xref: /xnu-12377.1.9/bsd/net/rtsock.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)rtsock.c	8.5 (Berkeley) 11/2/94
61  */
62 
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kauth.h>
66 #include <sys/kernel.h>
67 #include <sys/proc.h>
68 #include <sys/malloc.h>
69 #include <sys/mbuf.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/domain.h>
73 #include <sys/protosw.h>
74 #include <sys/syslog.h>
75 #include <sys/mcache.h>
76 #include <kern/locks.h>
77 #include <kern/uipc_domain.h>
78 #include <sys/codesign.h>
79 
80 #include <net/if.h>
81 #include <net/route.h>
82 #include <net/dlil.h>
83 #include <net/raw_cb.h>
84 #include <net/net_sysctl.h>
85 
86 #include <netinet/in.h>
87 #include <netinet/in_var.h>
88 #include <netinet/in_arp.h>
89 #include <netinet/ip.h>
90 #include <netinet/ip6.h>
91 #include <netinet6/nd6.h>
92 
93 #include <net/sockaddr_utils.h>
94 
95 #include <IOKit/IOBSD.h>
96 
97 extern struct rtstat_64 rtstat;
98 extern struct domain routedomain_s;
99 static struct domain *routedomain = NULL;
100 
101 static struct sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
102 static struct sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
103 static struct sockaddr sa_zero   = { .sa_len = sizeof(sa_zero), .sa_family = AF_INET, .sa_data = { 0, } };
104 
105 struct route_cb {
106 	u_int32_t       ip_count;       /* attached w/ AF_INET */
107 	u_int32_t       ip6_count;      /* attached w/ AF_INET6 */
108 	u_int32_t       any_count;      /* total attached */
109 };
110 
111 static struct route_cb route_cb;
112 
113 struct walkarg {
114 	int     w_tmemsize;
115 	int     w_op, w_arg;
116 	caddr_t w_tmem __sized_by(w_tmemsize);
117 	struct sysctl_req *w_req;
118 };
119 
120 typedef struct walkarg * __single walkarg_ref_t;
121 
122 static void route_dinit(struct domain *);
123 static int rts_abort(struct socket *);
124 static int rts_attach(struct socket *, int, struct proc *);
125 static int rts_bind(struct socket *, struct sockaddr *, struct proc *);
126 static int rts_connect(struct socket *, struct sockaddr *, struct proc *);
127 static int rts_detach(struct socket *);
128 static int rts_disconnect(struct socket *);
129 static int rts_peeraddr(struct socket *, struct sockaddr **);
130 static int rts_send(struct socket *, int, struct mbuf *, struct sockaddr *,
131     struct mbuf *, struct proc *);
132 static int rts_shutdown(struct socket *);
133 static int rts_sockaddr(struct socket *, struct sockaddr **);
134 
135 static int route_output(struct mbuf *, struct socket *);
136 static int rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
137 static void rt_getmetrics(struct rtentry *, struct rt_metrics *);
138 static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
139     struct sockaddr *, unsigned int);
140 static int rt_xaddrs(caddr_t cp __ended_by(cplim), caddr_t cplim, struct rt_addrinfo *rtinfo, struct sockaddr xtra_storage[RTAX_MAX]);
141 static struct mbuf *rt_msg1(u_char, struct rt_addrinfo *);
142 static int rt_msg2(u_char, struct rt_addrinfo *, caddr_t __indexable, struct walkarg *,
143     kauth_cred_t *);
144 static int sysctl_dumpentry(struct radix_node *rn, void *vw);
145 static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw);
146 static int sysctl_iflist(int af, struct walkarg *w);
147 static int sysctl_iflist2(int af, struct walkarg *w);
148 static int sysctl_rtstat(struct sysctl_req *);
149 static int sysctl_rtstat_64(struct sysctl_req *);
150 static int sysctl_rttrash(struct sysctl_req *);
151 static int sysctl_rtsock SYSCTL_HANDLER_ARGS;
152 
153 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED,
154     sysctl_rtsock, "");
155 
156 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "routing");
157 
158 /* Align x to 1024 (only power of 2) assuming x is positive */
159 #define ALIGN_BYTES(x) do {                                             \
160 	x = (uint32_t)P2ALIGN(x, 1024);                         \
161 } while(0)
162 
163 #define ROUNDUP32(a)                                                    \
164 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (uint32_t) - 1))) :        \
165 	sizeof (uint32_t))
166 
167 
168 #define RT_HAS_IFADDR(rt)                                               \
169 	((rt)->rt_ifa != NULL && (rt)->rt_ifa->ifa_addr != NULL)
170 
171 /*
172  * It really doesn't make any sense at all for this code to share much
173  * with raw_usrreq.c, since its functionality is so restricted.  XXX
174  */
175 static int
rts_abort(struct socket * so)176 rts_abort(struct socket *so)
177 {
178 	return raw_usrreqs.pru_abort(so);
179 }
180 
181 /* pru_accept is EOPNOTSUPP */
182 
183 static int
rts_attach(struct socket * so,int proto,struct proc * p)184 rts_attach(struct socket *so, int proto, struct proc *p)
185 {
186 #pragma unused(p)
187 	struct rawcb *rp;
188 	int error;
189 
190 	VERIFY(so->so_pcb == NULL);
191 
192 	rp = kalloc_type(struct rawcb, Z_WAITOK_ZERO_NOFAIL);
193 	so->so_pcb = (caddr_t)rp;
194 	/* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */
195 	error = raw_attach(so, proto);
196 	rp = sotorawcb(so);
197 	if (error) {
198 		kfree_type(struct rawcb, rp);
199 		so->so_pcb = NULL;
200 		so->so_flags |= SOF_PCBCLEARING;
201 		return error;
202 	}
203 
204 	switch (rp->rcb_proto.sp_protocol) {
205 	case AF_INET:
206 		os_atomic_inc(&route_cb.ip_count, relaxed);
207 		break;
208 	case AF_INET6:
209 		os_atomic_inc(&route_cb.ip6_count, relaxed);
210 		break;
211 	}
212 	rp->rcb_faddr = &route_src;
213 	os_atomic_inc(&route_cb.any_count, relaxed);
214 	/* the socket is already locked when we enter rts_attach */
215 	soisconnected(so);
216 	so->so_options |= SO_USELOOPBACK;
217 	return 0;
218 }
219 
220 static int
rts_bind(struct socket * so,struct sockaddr * nam,struct proc * p)221 rts_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
222 {
223 	return raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */
224 }
225 
226 static int
rts_connect(struct socket * so,struct sockaddr * nam,struct proc * p)227 rts_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
228 {
229 	return raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */
230 }
231 
232 /* pru_connect2 is EOPNOTSUPP */
233 /* pru_control is EOPNOTSUPP */
234 
235 static int
rts_detach(struct socket * so)236 rts_detach(struct socket *so)
237 {
238 	struct rawcb *rp = sotorawcb(so);
239 
240 	VERIFY(rp != NULL);
241 
242 	switch (rp->rcb_proto.sp_protocol) {
243 	case AF_INET:
244 		os_atomic_dec(&route_cb.ip_count, relaxed);
245 		break;
246 	case AF_INET6:
247 		os_atomic_dec(&route_cb.ip6_count, relaxed);
248 		break;
249 	}
250 	os_atomic_dec(&route_cb.any_count, relaxed);
251 	return raw_usrreqs.pru_detach(so);
252 }
253 
254 static int
rts_disconnect(struct socket * so)255 rts_disconnect(struct socket *so)
256 {
257 	return raw_usrreqs.pru_disconnect(so);
258 }
259 
260 /* pru_listen is EOPNOTSUPP */
261 
262 static int
rts_peeraddr(struct socket * so,struct sockaddr ** nam)263 rts_peeraddr(struct socket *so, struct sockaddr **nam)
264 {
265 	return raw_usrreqs.pru_peeraddr(so, nam);
266 }
267 
268 /* pru_rcvd is EOPNOTSUPP */
269 /* pru_rcvoob is EOPNOTSUPP */
270 
271 static int
rts_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)272 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
273     struct mbuf *control, struct proc *p)
274 {
275 	return raw_usrreqs.pru_send(so, flags, m, nam, control, p);
276 }
277 
278 /* pru_sense is null */
279 
280 static int
rts_shutdown(struct socket * so)281 rts_shutdown(struct socket *so)
282 {
283 	return raw_usrreqs.pru_shutdown(so);
284 }
285 
286 static int
rts_sockaddr(struct socket * so,struct sockaddr ** nam)287 rts_sockaddr(struct socket *so, struct sockaddr **nam)
288 {
289 	return raw_usrreqs.pru_sockaddr(so, nam);
290 }
291 
292 static struct pr_usrreqs route_usrreqs = {
293 	.pru_abort =            rts_abort,
294 	.pru_attach =           rts_attach,
295 	.pru_bind =             rts_bind,
296 	.pru_connect =          rts_connect,
297 	.pru_detach =           rts_detach,
298 	.pru_disconnect =       rts_disconnect,
299 	.pru_peeraddr =         rts_peeraddr,
300 	.pru_send =             rts_send,
301 	.pru_shutdown =         rts_shutdown,
302 	.pru_sockaddr =         rts_sockaddr,
303 	.pru_sosend =           sosend,
304 	.pru_soreceive =        soreceive,
305 };
306 
307 static struct rt_msghdr *
308 __attribute__((always_inline))
309 __stateful_pure
_rtm_hdr(caddr_t rtm_data __header_indexable)310 _rtm_hdr(caddr_t rtm_data __header_indexable)
311 {
312 #pragma clang diagnostic push
313 #pragma clang diagnostic ignored "-Wcast-align"
314 	return (struct rt_msghdr*)rtm_data;
315 #pragma clang diagnostic pop
316 }
317 
318 /*ARGSUSED*/
319 static int
route_output(struct mbuf * m,struct socket * so)320 route_output(struct mbuf *m, struct socket *so)
321 {
322 	size_t rtm_len = 0;
323 	caddr_t rtm_buf __counted_by(rtm_len) = NULL;
324 	caddr_t rtm_tmpbuf;
325 #define RTM _rtm_hdr(rtm_buf)
326 	rtentry_ref_t rt = NULL;
327 	rtentry_ref_t saved_nrt = NULL;
328 	struct radix_node_head *rnh;
329 	struct rt_addrinfo info;
330 	struct sockaddr    tiny_sa_storage[RTAX_MAX];
331 	int len, error = 0;
332 	sa_family_t dst_sa_family = 0;
333 	struct ifnet *ifp = NULL;
334 	struct sockaddr_in dst_in, gate_in;
335 	int sendonlytoself = 0;
336 	unsigned int ifscope = IFSCOPE_NONE;
337 	struct rawcb *rp = NULL;
338 	boolean_t is_router = FALSE;
339 #define senderr(e) { error = (e); goto flush; }
340 	if (m == NULL || ((m->m_len < sizeof(intptr_t)) &&
341 	    (m = m_pullup(m, sizeof(intptr_t))) == NULL)) {
342 		return ENOBUFS;
343 	}
344 	VERIFY(m->m_flags & M_PKTHDR);
345 
346 	/*
347 	 * Unlock the socket (but keep a reference) it won't be
348 	 * accessed until raw_input appends to it.
349 	 */
350 	socket_unlock(so, 0);
351 	lck_mtx_lock(rnh_lock);
352 
353 	len = m->m_pkthdr.len;
354 	if (len < sizeof(*RTM) ||
355 	    len != mtod(m, struct rt_msghdr_prelude *)->rtm_msglen) {
356 		info.rti_info[RTAX_DST] = NULL;
357 		senderr(EINVAL);
358 	}
359 
360 	/*
361 	 * Allocate the buffer for the message. First we allocate
362 	 * a temporary buffer, and if successful, set the pointers.
363 	 */
364 	rtm_tmpbuf = kalloc_data(len, Z_WAITOK);
365 	if (rtm_tmpbuf == NULL) {
366 		info.rti_info[RTAX_DST] = NULL;
367 		senderr(ENOBUFS);
368 	}
369 	rtm_len = (size_t)len;
370 	rtm_buf = rtm_tmpbuf;
371 	rtm_tmpbuf = NULL;
372 
373 
374 	m_copydata(m, 0, len, rtm_buf);
375 
376 	if (RTM->rtm_version != RTM_VERSION) {
377 		info.rti_info[RTAX_DST] = NULL;
378 		senderr(EPROTONOSUPPORT);
379 	}
380 
381 	/*
382 	 * Silent version of RTM_GET for Reachabiltiy APIs. We may change
383 	 * all RTM_GETs to be silent in the future, so this is private for now.
384 	 */
385 	if (RTM->rtm_type == RTM_GET_SILENT) {
386 		if (!(so->so_options & SO_USELOOPBACK)) {
387 			senderr(EINVAL);
388 		}
389 		sendonlytoself = 1;
390 		RTM->rtm_type = RTM_GET;
391 	}
392 
393 	/*
394 	 * Perform permission checking, only privileged sockets
395 	 * may perform operations other than RTM_GET
396 	 */
397 	if (RTM->rtm_type != RTM_GET && !(so->so_state & SS_PRIV)) {
398 		info.rti_info[RTAX_DST] = NULL;
399 		senderr(EPERM);
400 	}
401 
402 	RTM->rtm_pid = proc_selfpid();
403 	info.rti_addrs = RTM->rtm_addrs;
404 
405 	if (rt_xaddrs(rtm_buf + sizeof(struct rt_msghdr), rtm_buf + rtm_len, &info, tiny_sa_storage)) {
406 		info.rti_info[RTAX_DST] = NULL;
407 		senderr(EINVAL);
408 	}
409 
410 	if (info.rti_info[RTAX_DST] == NULL ||
411 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
412 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
413 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) {
414 		senderr(EINVAL);
415 	}
416 
417 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET &&
418 	    info.rti_info[RTAX_DST]->sa_len != sizeof(struct sockaddr_in)) {
419 		/* At minimum, we need up to sin_addr */
420 		if (info.rti_info[RTAX_DST]->sa_len <
421 		    offsetof(struct sockaddr_in, sin_zero)) {
422 			senderr(EINVAL);
423 		}
424 
425 		SOCKADDR_ZERO(&dst_in, sizeof(dst_in));
426 		dst_in.sin_len = sizeof(dst_in);
427 		dst_in.sin_family = AF_INET;
428 		dst_in.sin_port = SIN(info.rti_info[RTAX_DST])->sin_port;
429 		dst_in.sin_addr = SIN(info.rti_info[RTAX_DST])->sin_addr;
430 		info.rti_info[RTAX_DST] = SA(&dst_in);
431 		dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
432 	} else if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
433 	    info.rti_info[RTAX_DST]->sa_len < sizeof(struct sockaddr_in6)) {
434 		senderr(EINVAL);
435 	}
436 
437 	if (info.rti_info[RTAX_GATEWAY] != NULL) {
438 		if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET &&
439 		    info.rti_info[RTAX_GATEWAY]->sa_len != sizeof(struct sockaddr_in)) {
440 			/* At minimum, we need up to sin_addr */
441 			if (info.rti_info[RTAX_GATEWAY]->sa_len <
442 			    offsetof(struct sockaddr_in, sin_zero)) {
443 				senderr(EINVAL);
444 			}
445 
446 			SOCKADDR_ZERO(&gate_in, sizeof(gate_in));
447 			gate_in.sin_len = sizeof(gate_in);
448 			gate_in.sin_family = AF_INET;
449 			gate_in.sin_port = SIN(info.rti_info[RTAX_GATEWAY])->sin_port;
450 			gate_in.sin_addr = SIN(info.rti_info[RTAX_GATEWAY])->sin_addr;
451 			info.rti_info[RTAX_GATEWAY] = SA(&gate_in);
452 		} else if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET6 &&
453 		    info.rti_info[RTAX_GATEWAY]->sa_len < sizeof(struct sockaddr_in6)) {
454 			senderr(EINVAL);
455 		}
456 	}
457 
458 	if (info.rti_info[RTAX_GENMASK]) {
459 		struct radix_node *t;
460 		struct sockaddr *genmask = SA(info.rti_info[RTAX_GENMASK]);
461 		void *genmask_bytes = __SA_UTILS_CONV_TO_BYTES(genmask);
462 		t = rn_addmask(genmask_bytes, 0, 1);
463 		if (t != NULL && SOCKADDR_CMP(genmask, rn_get_key(t), genmask->sa_len) == 0) {
464 			info.rti_info[RTAX_GENMASK] = SA(rn_get_key(t));
465 		} else {
466 			senderr(ENOBUFS);
467 		}
468 	}
469 
470 	/*
471 	 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
472 	 */
473 	if (RTM->rtm_flags & RTF_IFSCOPE) {
474 		if (info.rti_info[RTAX_DST]->sa_family != AF_INET &&
475 		    info.rti_info[RTAX_DST]->sa_family != AF_INET6) {
476 			senderr(EINVAL);
477 		}
478 		ifscope = RTM->rtm_index;
479 	}
480 	/*
481 	 * Block changes on INTCOPROC interfaces.
482 	 */
483 	if (ifscope != IFSCOPE_NONE) {
484 		unsigned int intcoproc_scope = 0;
485 		ifnet_head_lock_shared();
486 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
487 			if (IFNET_IS_INTCOPROC(ifp)) {
488 				intcoproc_scope = ifp->if_index;
489 				break;
490 			}
491 		}
492 		ifnet_head_done();
493 		if (intcoproc_scope == ifscope && proc_getpid(current_proc()) != 0) {
494 			senderr(EINVAL);
495 		}
496 	}
497 	/*
498 	 * Require entitlement to change management interfaces
499 	 */
500 	if (management_control_unrestricted == false && if_management_interface_check_needed == true &&
501 	    ifscope != IFSCOPE_NONE && proc_getpid(current_proc()) != 0) {
502 		bool is_management = false;
503 
504 		ifnet_head_lock_shared();
505 		if (IF_INDEX_IN_RANGE(ifscope)) {
506 			ifp = ifindex2ifnet[ifscope];
507 			if (ifp != NULL && IFNET_IS_MANAGEMENT(ifp)) {
508 				is_management = true;
509 			}
510 		}
511 		ifnet_head_done();
512 
513 		if (is_management && !IOCurrentTaskHasEntitlement(MANAGEMENT_CONTROL_ENTITLEMENT)) {
514 			senderr(EINVAL);
515 		}
516 	}
517 
518 	/*
519 	 * RTF_PROXY can only be set internally from within the kernel.
520 	 */
521 	if (RTM->rtm_flags & RTF_PROXY) {
522 		senderr(EINVAL);
523 	}
524 
525 	/*
526 	 * For AF_INET, always zero out the embedded scope ID.  If this is
527 	 * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
528 	 * flag and the corresponding rtm_index value.  This is to prevent
529 	 * false interpretation of the scope ID because it's using the sin_zero
530 	 * field, which might not be properly cleared by the requestor.
531 	 */
532 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET) {
533 		sin_set_ifscope(info.rti_info[RTAX_DST], IFSCOPE_NONE);
534 	}
535 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
536 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET) {
537 		sin_set_ifscope(info.rti_info[RTAX_GATEWAY], IFSCOPE_NONE);
538 	}
539 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
540 	    IN6_IS_SCOPE_EMBED(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
541 	    !IN6_IS_ADDR_UNICAST_BASED_MULTICAST(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
542 	    SIN6(info.rti_info[RTAX_DST])->sin6_scope_id == 0) {
543 		SIN6(info.rti_info[RTAX_DST])->sin6_scope_id = ntohs(SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1]);
544 		SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1] = 0;
545 	}
546 
547 	switch (RTM->rtm_type) {
548 	case RTM_ADD:
549 		if (info.rti_info[RTAX_GATEWAY] == NULL) {
550 			senderr(EINVAL);
551 		}
552 
553 		error = rtrequest_scoped_locked(RTM_ADD,
554 		    info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
555 		    info.rti_info[RTAX_NETMASK], RTM->rtm_flags, &saved_nrt,
556 		    ifscope);
557 		if (error == 0 && saved_nrt != NULL) {
558 			RT_LOCK(saved_nrt);
559 			/*
560 			 * If the route request specified an interface with
561 			 * IFA and/or IFP, we set the requested interface on
562 			 * the route with rt_setif.  It would be much better
563 			 * to do this inside rtrequest, but that would
564 			 * require passing the desired interface, in some
565 			 * form, to rtrequest.  Since rtrequest is called in
566 			 * so many places (roughly 40 in our source), adding
567 			 * a parameter is to much for us to swallow; this is
568 			 * something for the FreeBSD developers to tackle.
569 			 * Instead, we let rtrequest compute whatever
570 			 * interface it wants, then come in behind it and
571 			 * stick in the interface that we really want.  This
572 			 * works reasonably well except when rtrequest can't
573 			 * figure out what interface to use (with
574 			 * ifa_withroute) and returns ENETUNREACH.  Ideally
575 			 * it shouldn't matter if rtrequest can't figure out
576 			 * the interface if we're going to explicitly set it
577 			 * ourselves anyway.  But practically we can't
578 			 * recover here because rtrequest will not do any of
579 			 * the work necessary to add the route if it can't
580 			 * find an interface.  As long as there is a default
581 			 * route that leads to some interface, rtrequest will
582 			 * find an interface, so this problem should be
583 			 * rarely encountered.
584 			 * [email protected]
585 			 */
586 			rt_setif(saved_nrt,
587 			    info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
588 			    info.rti_info[RTAX_GATEWAY], ifscope);
589 			(void)rt_setmetrics(RTM->rtm_inits, &RTM->rtm_rmx, saved_nrt);
590 			saved_nrt->rt_rmx.rmx_locks &= ~(RTM->rtm_inits);
591 			saved_nrt->rt_rmx.rmx_locks |=
592 			    (RTM->rtm_inits & RTM->rtm_rmx.rmx_locks);
593 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
594 			if ((saved_nrt->rt_flags & (RTF_UP | RTF_LLINFO)) ==
595 			    (RTF_UP | RTF_LLINFO)) {
596 				rt_lookup_qset_id(saved_nrt, false);
597 			}
598 			RT_REMREF_LOCKED(saved_nrt);
599 			RT_UNLOCK(saved_nrt);
600 		}
601 		break;
602 
603 	case RTM_DELETE:
604 		error = rtrequest_scoped_locked(RTM_DELETE,
605 		    info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
606 		    info.rti_info[RTAX_NETMASK], RTM->rtm_flags, &saved_nrt,
607 		    ifscope);
608 		if (error == 0) {
609 			rt = saved_nrt;
610 			RT_LOCK(rt);
611 			goto report;
612 		}
613 		break;
614 
615 	case RTM_GET:
616 	case RTM_CHANGE:
617 	case RTM_LOCK:
618 		rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
619 		if (rnh == NULL) {
620 			senderr(EAFNOSUPPORT);
621 		}
622 		/*
623 		 * Lookup the best match based on the key-mask pair;
624 		 * callee adds a reference and checks for root node.
625 		 */
626 		rt = rt_lookup(TRUE, info.rti_info[RTAX_DST],
627 		    info.rti_info[RTAX_NETMASK], rnh, ifscope);
628 		if (rt == NULL) {
629 			senderr(ESRCH);
630 		}
631 		RT_LOCK(rt);
632 
633 		/*
634 		 * Holding rnh_lock here prevents the possibility of
635 		 * ifa from changing (e.g. in_ifinit), so it is safe
636 		 * to access its ifa_addr (down below) without locking.
637 		 */
638 		switch (RTM->rtm_type) {
639 		case RTM_GET: {
640 			kauth_cred_t cred __single;
641 			kauth_cred_t* credp;
642 			struct ifaddr *ifa2;
643 			/*
644 			 * The code below serves both the `RTM_GET'
645 			 * and the `RTM_DELETE' requests.
646 			 */
647 report:
648 			cred = current_cached_proc_cred(PROC_NULL);
649 			credp = &cred;
650 
651 			ifa2 = NULL;
652 			RT_LOCK_ASSERT_HELD(rt);
653 			info.rti_info[RTAX_DST] = rt_key(rt);
654 			dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
655 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
656 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
657 			info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
658 			if (RTM->rtm_addrs & (RTA_IFP | RTA_IFA)) {
659 				ifp = rt->rt_ifp;
660 				if (ifp != NULL) {
661 					ifnet_lock_shared(ifp);
662 					ifa2 = ifp->if_lladdr;
663 					info.rti_info[RTAX_IFP] = ifa2->ifa_addr;
664 					ifa_addref(ifa2);
665 					ifnet_lock_done(ifp);
666 					info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
667 					RTM->rtm_index = ifp->if_index;
668 				} else {
669 					info.rti_info[RTAX_IFP] = NULL;
670 					info.rti_info[RTAX_IFA] = NULL;
671 				}
672 			} else if ((ifp = rt->rt_ifp) != NULL) {
673 				RTM->rtm_index = ifp->if_index;
674 			}
675 
676 			/*
677 			 * Determine the length required for the routing information
678 			 * report.
679 			 */
680 			if (ifa2 != NULL) {
681 				IFA_LOCK(ifa2);
682 			}
683 			len = rt_msg2(RTM->rtm_type, &info, NULL, NULL, credp);
684 			if (ifa2 != NULL) {
685 				IFA_UNLOCK(ifa2);
686 			}
687 
688 			/*
689 			 * Allocate output message for the routing information report.
690 			 */
691 			VERIFY(rtm_tmpbuf == NULL);
692 			rtm_tmpbuf = kalloc_data(len, Z_WAITOK);
693 			if (rtm_tmpbuf == NULL) {
694 				RT_UNLOCK(rt);
695 				if (ifa2 != NULL) {
696 					ifa_remref(ifa2);
697 				}
698 				senderr(ENOBUFS);
699 			}
700 
701 			/*
702 			 * Create the header for the output message, based
703 			 * on the request message header and the current routing information.
704 			 */
705 			struct rt_msghdr *out_rtm = _rtm_hdr(rtm_tmpbuf);
706 			bcopy(RTM, out_rtm, sizeof(struct rt_msghdr));
707 			out_rtm->rtm_flags = rt->rt_flags;
708 			rt_getmetrics(rt, &out_rtm->rtm_rmx);
709 			out_rtm->rtm_addrs = info.rti_addrs;
710 
711 			/*
712 			 * Populate the body of the output message.
713 			 */
714 			if (ifa2 != NULL) {
715 				IFA_LOCK(ifa2);
716 			}
717 			(void) rt_msg2(out_rtm->rtm_type, &info, rtm_tmpbuf,
718 			    NULL, &cred);
719 			if (ifa2 != NULL) {
720 				IFA_UNLOCK(ifa2);
721 			}
722 
723 			/*
724 			 * Replace the "main" routing message with the output message
725 			 * we have constructed.
726 			 */
727 			kfree_data_counted_by(rtm_buf, rtm_len);
728 			rtm_len = len;
729 			rtm_buf = rtm_tmpbuf;
730 			rtm_tmpbuf = NULL;
731 
732 			if (ifa2 != NULL) {
733 				ifa_remref(ifa2);
734 			}
735 
736 			break;
737 		}
738 
739 		case RTM_CHANGE:
740 			is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
741 
742 			if (info.rti_info[RTAX_GATEWAY] != NULL &&
743 			    (error = rt_setgate(rt, rt_key(rt),
744 			    info.rti_info[RTAX_GATEWAY]))) {
745 				int tmp = error;
746 				RT_UNLOCK(rt);
747 				senderr(tmp);
748 			}
749 			/*
750 			 * If they tried to change things but didn't specify
751 			 * the required gateway, then just use the old one.
752 			 * This can happen if the user tries to change the
753 			 * flags on the default route without changing the
754 			 * default gateway. Changing flags still doesn't work.
755 			 */
756 			if ((rt->rt_flags & RTF_GATEWAY) &&
757 			    info.rti_info[RTAX_GATEWAY] == NULL) {
758 				info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
759 			}
760 
761 			/*
762 			 * On Darwin, we call rt_setif which contains the
763 			 * equivalent to the code found at this very spot
764 			 * in BSD.
765 			 */
766 			rt_setif(rt,
767 			    info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
768 			    info.rti_info[RTAX_GATEWAY], ifscope);
769 
770 			if ((error = rt_setmetrics(RTM->rtm_inits,
771 			    &RTM->rtm_rmx, rt))) {
772 				int tmp = error;
773 				RT_UNLOCK(rt);
774 				senderr(tmp);
775 			}
776 			if (info.rti_info[RTAX_GENMASK]) {
777 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
778 			}
779 
780 			/*
781 			 * Enqueue work item to invoke callback for this route entry
782 			 * This may not be needed always, but for now issue it anytime
783 			 * RTM_CHANGE gets called.
784 			 */
785 			route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, TRUE);
786 			/*
787 			 * If the route is for a router, walk the tree to send refresh
788 			 * event to protocol cloned entries
789 			 */
790 			if (is_router) {
791 				struct route_event rt_ev;
792 				route_event_init(&rt_ev, rt, NULL, ROUTE_ENTRY_REFRESH);
793 				RT_UNLOCK(rt);
794 				(void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
795 				RT_LOCK(rt);
796 			}
797 			OS_FALLTHROUGH;
798 		case RTM_LOCK:
799 			rt->rt_rmx.rmx_locks &= ~(RTM->rtm_inits);
800 			rt->rt_rmx.rmx_locks |=
801 			    (RTM->rtm_inits & RTM->rtm_rmx.rmx_locks);
802 			break;
803 		}
804 		RT_UNLOCK(rt);
805 		break;
806 	default:
807 		senderr(EOPNOTSUPP);
808 	}
809 flush:
810 	if (RTM != NULL) {
811 		if (error) {
812 			RTM->rtm_errno = error;
813 		} else {
814 			RTM->rtm_flags |= RTF_DONE;
815 		}
816 	}
817 	if (rt != NULL) {
818 		RT_LOCK_ASSERT_NOTHELD(rt);
819 		rtfree_locked(rt);
820 	}
821 	lck_mtx_unlock(rnh_lock);
822 
823 	/* relock the socket now */
824 	socket_lock(so, 0);
825 	/*
826 	 * Check to see if we don't want our own messages.
827 	 */
828 	if (!(so->so_options & SO_USELOOPBACK)) {
829 		if (route_cb.any_count <= 1) {
830 			kfree_data_counted_by(rtm_buf, rtm_len);
831 			m_freem(m);
832 			return error;
833 		}
834 		/* There is another listener, so construct message */
835 		rp = sotorawcb(so);
836 	}
837 	if (rtm_buf != NULL) {
838 		m_copyback(m, 0, RTM->rtm_msglen, rtm_buf);
839 		if (m->m_pkthdr.len < RTM->rtm_msglen) {
840 			m_freem(m);
841 			m = NULL;
842 		} else if (m->m_pkthdr.len > RTM->rtm_msglen) {
843 			m_adj(m, RTM->rtm_msglen - m->m_pkthdr.len);
844 		}
845 		kfree_data_counted_by(rtm_buf, rtm_len);
846 	}
847 	if (sendonlytoself && m != NULL) {
848 		error = 0;
849 		if (sbappendaddr(&so->so_rcv, &route_src, m,
850 		    NULL, &error) != 0) {
851 			sorwakeup(so);
852 		}
853 		if (error) {
854 			return error;
855 		}
856 	} else {
857 		struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
858 		if (rp != NULL) {
859 			rp->rcb_proto.sp_family = 0; /* Avoid us */
860 		}
861 		if (dst_sa_family != 0) {
862 			route_proto.sp_protocol = dst_sa_family;
863 		}
864 		if (m != NULL) {
865 			socket_unlock(so, 0);
866 			raw_input(m, &route_proto, &route_src, &route_dst);
867 			socket_lock(so, 0);
868 		}
869 		if (rp != NULL) {
870 			rp->rcb_proto.sp_family = PF_ROUTE;
871 		}
872 	}
873 	return error;
874 #undef RTM /* was defined to __rtm_hdr(rtm_buf) */
875 }
876 
877 void
rt_setexpire(struct rtentry * rt,uint64_t expiry)878 rt_setexpire(struct rtentry *rt, uint64_t expiry)
879 {
880 	/* set both rt_expire and rmx_expire */
881 	rt->rt_expire = expiry;
882 	if (expiry) {
883 		rt->rt_rmx.rmx_expire =
884 		    (int32_t)(expiry + rt->base_calendartime -
885 		    rt->base_uptime);
886 	} else {
887 		rt->rt_rmx.rmx_expire = 0;
888 	}
889 }
890 
891 static int
rt_setmetrics(u_int32_t which,struct rt_metrics * in,struct rtentry * out)892 rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out)
893 {
894 	if (!(which & RTV_REFRESH_HOST)) {
895 		struct timeval caltime;
896 		getmicrotime(&caltime);
897 #define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
898 		metric(RTV_RPIPE, rmx_recvpipe);
899 		metric(RTV_SPIPE, rmx_sendpipe);
900 		metric(RTV_SSTHRESH, rmx_ssthresh);
901 		metric(RTV_RTT, rmx_rtt);
902 		metric(RTV_RTTVAR, rmx_rttvar);
903 		metric(RTV_HOPCOUNT, rmx_hopcount);
904 		metric(RTV_MTU, rmx_mtu);
905 		metric(RTV_EXPIRE, rmx_expire);
906 #undef metric
907 		if (out->rt_rmx.rmx_expire > 0) {
908 			/* account for system time change */
909 			getmicrotime(&caltime);
910 			out->base_calendartime +=
911 			    NET_CALCULATE_CLOCKSKEW(caltime,
912 			    out->base_calendartime,
913 			    net_uptime(), out->base_uptime);
914 			rt_setexpire(out,
915 			    out->rt_rmx.rmx_expire -
916 			    out->base_calendartime +
917 			    out->base_uptime);
918 		} else {
919 			rt_setexpire(out, 0);
920 		}
921 
922 		VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
923 		VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
924 	} else {
925 		/* Only RTV_REFRESH_HOST must be set */
926 		if ((which & ~RTV_REFRESH_HOST) ||
927 		    (out->rt_flags & RTF_STATIC) ||
928 		    !(out->rt_flags & RTF_LLINFO)) {
929 			return EINVAL;
930 		}
931 
932 		if (out->rt_llinfo_refresh == NULL) {
933 			return ENOTSUP;
934 		}
935 
936 		out->rt_llinfo_refresh(out);
937 	}
938 	return 0;
939 }
940 
941 static void
rt_getmetrics(struct rtentry * in,struct rt_metrics * out)942 rt_getmetrics(struct rtentry *in, struct rt_metrics *out)
943 {
944 	struct timeval caltime;
945 
946 	VERIFY(in->rt_expire == 0 || in->rt_rmx.rmx_expire != 0);
947 	VERIFY(in->rt_expire != 0 || in->rt_rmx.rmx_expire == 0);
948 
949 	*out = in->rt_rmx;
950 
951 	if (in->rt_expire != 0) {
952 		/* account for system time change */
953 		getmicrotime(&caltime);
954 
955 		in->base_calendartime +=
956 		    NET_CALCULATE_CLOCKSKEW(caltime,
957 		    in->base_calendartime, net_uptime(), in->base_uptime);
958 
959 		out->rmx_expire = (int32_t)(in->base_calendartime +
960 		    in->rt_expire - in->base_uptime);
961 	} else {
962 		out->rmx_expire = 0;
963 	}
964 }
965 
966 /*
967  * Set route's interface given info.rti_info[RTAX_IFP],
968  * info.rti_info[RTAX_IFA], and gateway.
969  */
970 static void
rt_setif(struct rtentry * rt,struct sockaddr * Ifpaddr,struct sockaddr * Ifaaddr,struct sockaddr * Gate,unsigned int ifscope)971 rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
972     struct sockaddr *Gate, unsigned int ifscope)
973 {
974 	struct ifaddr *ifa = NULL;
975 	struct ifnet *ifp = NULL;
976 	void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
977 
978 	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
979 
980 	RT_LOCK_ASSERT_HELD(rt);
981 
982 	/* Don't update a defunct route */
983 	if (rt->rt_flags & RTF_CONDEMNED) {
984 		return;
985 	}
986 
987 	/* Add an extra ref for ourselves */
988 	RT_ADDREF_LOCKED(rt);
989 
990 	/* Become a regular mutex, just in case */
991 	RT_CONVERT_LOCK(rt);
992 
993 	/*
994 	 * New gateway could require new ifaddr, ifp; flags may also
995 	 * be different; ifp may be specified by ll sockaddr when
996 	 * protocol address is ambiguous.
997 	 */
998 	if (Ifpaddr && (ifa = ifa_ifwithnet_scoped(Ifpaddr, ifscope)) &&
999 	    (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) {
1000 		ifa_remref(ifa);
1001 		ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp);
1002 	} else {
1003 		if (ifa != NULL) {
1004 			ifa_remref(ifa);
1005 			ifa = NULL;
1006 		}
1007 		if (Ifpaddr && (ifp = if_withname(Ifpaddr))) {
1008 			if (Gate) {
1009 				ifa = ifaof_ifpforaddr(Gate, ifp);
1010 			} else {
1011 				ifnet_lock_shared(ifp);
1012 				ifa = TAILQ_FIRST(&ifp->if_addrhead);
1013 				if (ifa != NULL) {
1014 					ifa_addref(ifa);
1015 				}
1016 				ifnet_lock_done(ifp);
1017 			}
1018 		} else if (Ifaaddr &&
1019 		    (ifa = ifa_ifwithaddr_scoped(Ifaaddr, ifscope))) {
1020 			ifp = ifa->ifa_ifp;
1021 		} else if (Gate != NULL) {
1022 			/*
1023 			 * Safe to drop rt_lock and use rt_key, since holding
1024 			 * rnh_lock here prevents another thread from calling
1025 			 * rt_setgate() on this route.  We cannot hold the
1026 			 * lock across ifa_ifwithroute since the lookup done
1027 			 * by that routine may point to the same route.
1028 			 */
1029 			RT_UNLOCK(rt);
1030 			if ((ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags,
1031 			    rt_key(rt), Gate, ifscope)) != NULL) {
1032 				ifp = ifa->ifa_ifp;
1033 			}
1034 			RT_LOCK(rt);
1035 			/* Don't update a defunct route */
1036 			if (rt->rt_flags & RTF_CONDEMNED) {
1037 				if (ifa != NULL) {
1038 					ifa_remref(ifa);
1039 				}
1040 				/* Release extra ref */
1041 				RT_REMREF_LOCKED(rt);
1042 				return;
1043 			}
1044 		}
1045 	}
1046 
1047 	/* trigger route cache reevaluation */
1048 	if (rt_key(rt)->sa_family == AF_INET) {
1049 		routegenid_inet_update();
1050 	} else if (rt_key(rt)->sa_family == AF_INET6) {
1051 		routegenid_inet6_update();
1052 	}
1053 
1054 	if (ifa != NULL) {
1055 		struct ifaddr *oifa = rt->rt_ifa;
1056 		if (oifa != ifa) {
1057 			if (oifa != NULL) {
1058 				IFA_LOCK_SPIN(oifa);
1059 				ifa_rtrequest = oifa->ifa_rtrequest;
1060 				IFA_UNLOCK(oifa);
1061 				if (ifa_rtrequest != NULL) {
1062 					ifa_rtrequest(RTM_DELETE, rt, Gate);
1063 				}
1064 			}
1065 			rtsetifa(rt, ifa);
1066 
1067 			if (rt->rt_ifp != ifp) {
1068 				/*
1069 				 * Purge any link-layer info caching.
1070 				 */
1071 				if (rt->rt_llinfo_purge != NULL) {
1072 					rt->rt_llinfo_purge(rt);
1073 				}
1074 
1075 				/*
1076 				 * Adjust route ref count for the interfaces.
1077 				 */
1078 				if (rt->rt_if_ref_fn != NULL) {
1079 					rt->rt_if_ref_fn(ifp, 1);
1080 					rt->rt_if_ref_fn(rt->rt_ifp, -1);
1081 				}
1082 			}
1083 			rt->rt_ifp = ifp;
1084 			/*
1085 			 * If this is the (non-scoped) default route, record
1086 			 * the interface index used for the primary ifscope.
1087 			 */
1088 			if (rt_primary_default(rt, rt_key(rt))) {
1089 				set_primary_ifscope(rt_key(rt)->sa_family,
1090 				    rt->rt_ifp->if_index);
1091 			}
1092 			/*
1093 			 * If rmx_mtu is not locked, update it
1094 			 * to the MTU used by the new interface.
1095 			 */
1096 			if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
1097 				rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1098 				if (rt_key(rt)->sa_family == AF_INET &&
1099 				    INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
1100 					rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
1101 					/* Further adjust the size for CLAT46 expansion */
1102 					rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
1103 				}
1104 			}
1105 
1106 			if (rt->rt_ifa != NULL) {
1107 				IFA_LOCK_SPIN(rt->rt_ifa);
1108 				ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1109 				IFA_UNLOCK(rt->rt_ifa);
1110 				if (ifa_rtrequest != NULL) {
1111 					ifa_rtrequest(RTM_ADD, rt, Gate);
1112 				}
1113 			}
1114 			ifa_remref(ifa);
1115 			/* Release extra ref */
1116 			RT_REMREF_LOCKED(rt);
1117 			return;
1118 		}
1119 		ifa_remref(ifa);
1120 		ifa = NULL;
1121 	}
1122 
1123 	/* XXX: to reset gateway to correct value, at RTM_CHANGE */
1124 	if (rt->rt_ifa != NULL) {
1125 		IFA_LOCK_SPIN(rt->rt_ifa);
1126 		ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1127 		IFA_UNLOCK(rt->rt_ifa);
1128 		if (ifa_rtrequest != NULL) {
1129 			ifa_rtrequest(RTM_ADD, rt, Gate);
1130 		}
1131 	}
1132 
1133 	/*
1134 	 * Workaround for local address routes pointing to the loopback
1135 	 * interface added by configd, until <rdar://problem/12970142>.
1136 	 */
1137 	if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
1138 	    (rt->rt_flags & RTF_HOST) && rt->rt_ifa->ifa_ifp == rt->rt_ifp) {
1139 		ifa = ifa_ifwithaddr(rt_key(rt));
1140 		if (ifa != NULL) {
1141 			if (ifa != rt->rt_ifa) {
1142 				rtsetifa(rt, ifa);
1143 			}
1144 			ifa_remref(ifa);
1145 		}
1146 	}
1147 
1148 	/* Release extra ref */
1149 	RT_REMREF_LOCKED(rt);
1150 }
1151 
1152 /*
1153  * Extract the addresses of the passed sockaddrs.
1154  *
1155  * Do a little sanity checking so as to avoid bad memory references.
1156  * This data is derived straight from userland. Some of the data
1157  * anomalies are unrecoverable; for others we substitute the anomalous
1158  * user data with a sanitized replacement.
1159  *
1160  * Details on the input anomalies:
1161  *
1162  * 1. Unrecoverable input anomalies (retcode == EINVAL)
1163  *    The function returns EINVAL.
1164  *    1.1. Truncated sockaddrs at the end of the user-provided buffer.
1165  *    1.2. Unparseable sockaddr header (`0 < .sa_len && .sa_len < 2').
1166  *    1.3. Sockaddrs that won't fit `struct sockaddr_storage'.
1167  *
1168  * 2. Recoverable input anomalies (retcode == 0):
1169  *    The below anomalies would lead to a malformed `struct sockaddr *'
1170  *    pointers. Any attempt to pass such malformed pointers to a function
1171  *    or to assign those to another variable will cause a trap
1172  *    when the `-fbounds-safety' feature is enabled.
1173  *
1174  *    To mitigate the malformed pointers problem, we substitute the malformed
1175  *    user data with a well-formed sockaddrs.
1176  *
1177  *    2.1. Sockadrs with `.sa_len == 0' (aka "zero-length" sockaddrs).
1178  *         We substitute those with a pointer to the `sa_data' global
1179  *         variable.
1180  *    2.2. Sockaddrs with `.sa_len < 16' (a.k.a. "tiny" sockaddrs).
1181  *         We copy the contents of "tiny" sockaddrs to a location
1182  *         inside the `xtra_storage' parameter, and substitute
1183  *         the pointer into the user-provided data with the location
1184  *         in `xtra_storage'.
1185  */
1186 static int
rt_xaddrs(caddr_t cp __ended_by (cplim),caddr_t cplim,struct rt_addrinfo * rtinfo,struct sockaddr xtra_storage[RTAX_MAX])1187 rt_xaddrs(caddr_t cp __ended_by(cplim), caddr_t cplim, struct rt_addrinfo *rtinfo, struct sockaddr xtra_storage[RTAX_MAX])
1188 {
1189 	struct sockaddr *sa;
1190 	int i, next_tiny_sa = 0;
1191 
1192 	for (i = 0; i < RTAX_MAX; i++) {
1193 		SOCKADDR_ZERO(&xtra_storage[i], sizeof(struct sockaddr));
1194 	}
1195 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1196 
1197 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1198 		if ((rtinfo->rti_addrs & (1 << i)) == 0) {
1199 			continue;
1200 		}
1201 
1202 		/*
1203 		 * We expect the memory pointed to by `cp' to contain a valid socket address.
1204 		 * However, there are no guarantees that our expectations are correct,
1205 		 * since the buffer is passed from the user-space.
1206 		 * In particular, the socket address may be corrupted or truncated.
1207 		 * If we attempt to interpret the contents of the memory pointed to by `cp'
1208 		 * as a valid socket address, we may end up in a situation where the end
1209 		 * of the presumed socket address exceeds the end of the input buffer:
1210 		 *
1211 		 * +-------------------------------+
1212 		 * | user buffer                   |
1213 		 * +-------------------------------+
1214 		 *                       cp ^ cplim ^
1215 		 *                          +-----------------------+
1216 		 *                          | (struct sockaddr *)cp |
1217 		 *                          +-----------------------+
1218 		 *
1219 		 * In such case, we are likely to panic with the `-fbounds-safety' trap,
1220 		 * while the desired behavior is to return `ENOENT'.
1221 		 *
1222 		 * Because of the above concern, we can not optimistically cast the pointer
1223 		 * `cp' to `struct sockaddr*' until we have validated that the contents
1224 		 * of the memory can be safely interpreted as a socket address.
1225 		 *
1226 		 * Instead, we start by examining the expected length of the socket address,
1227 		 * which is guaranteed to be located at the first byte, and perform several
1228 		 * sanity checks, before interpreting the memory as a valid socket address.
1229 		 */
1230 		uint8_t next_sa_len = *cp;
1231 
1232 		/*
1233 		 * Is the user-provided sockaddr truncated?
1234 		 */
1235 		if ((cp + next_sa_len) > cplim) {
1236 			return EINVAL;
1237 		}
1238 
1239 		/*
1240 		 * Will the user-provided sockaddr fit the sockaddr storage?
1241 		 */
1242 		if (next_sa_len > sizeof(struct sockaddr_storage)) {
1243 			return EINVAL;
1244 		}
1245 
1246 		/*
1247 		 * there are no more.. quit now
1248 		 * If there are more bits, they are in error.
1249 		 * I've seen this. route(1) can evidently generate these.
1250 		 * This causes kernel to core dump.
1251 		 * for compatibility, If we see this, point to a safe address.
1252 		 */
1253 		if (next_sa_len == 0) {
1254 			rtinfo->rti_info[i] = &sa_zero;
1255 			return 0; /* should be EINVAL but for compat */
1256 		}
1257 
1258 		/*
1259 		 * Check for the minimal length.
1260 		 */
1261 		if (next_sa_len < offsetof(struct sockaddr, sa_data)) {
1262 			return EINVAL;
1263 		}
1264 
1265 		/*
1266 		 * Check whether we are looking at a "tiny" sockaddr,
1267 		 * and if so, copy the contents to the xtra storage.
1268 		 * See the comment to this function for the details
1269 		 * on "tiny" sockaddrs and the xtra storage.
1270 		 */
1271 		if (next_sa_len < sizeof(struct sockaddr)) {
1272 			sa = &xtra_storage[next_tiny_sa++];
1273 			SOCKADDR_COPY(cp, sa, next_sa_len);
1274 		} else {
1275 			sa = SA(cp);
1276 		}
1277 
1278 		/*
1279 		 * From this point on we can safely use `sa'.
1280 		 */
1281 
1282 		/* accepthe  it */
1283 		rtinfo->rti_info[i] = sa;
1284 		const uint32_t rounded_sa_len = ROUNDUP32(sa->sa_len);
1285 		if (cp + rounded_sa_len > cplim) {
1286 			break;
1287 		} else {
1288 			cp += rounded_sa_len;
1289 			cplim = cplim;
1290 		}
1291 	}
1292 	return 0;
1293 }
1294 
1295 static struct mbuf *
rt_msg1(u_char type,struct rt_addrinfo * rtinfo)1296 rt_msg1(u_char type, struct rt_addrinfo *rtinfo)
1297 {
1298 	struct rt_msghdr_common *rtmh;
1299 	int32_t *rtm_buf; /* int32 to preserve the alingment. */
1300 	struct mbuf *m;
1301 	int i;
1302 	int len, dlen, off;
1303 
1304 	switch (type) {
1305 	case RTM_DELADDR:
1306 	case RTM_NEWADDR:
1307 		len = sizeof(struct ifa_msghdr);
1308 		break;
1309 
1310 	case RTM_DELMADDR:
1311 	case RTM_NEWMADDR:
1312 		len = sizeof(struct ifma_msghdr);
1313 		break;
1314 
1315 	case RTM_IFINFO:
1316 		len = sizeof(struct if_msghdr);
1317 		break;
1318 
1319 	default:
1320 		len = sizeof(struct rt_msghdr);
1321 	}
1322 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1323 	if (m && len > MHLEN) {
1324 		MCLGET(m, M_DONTWAIT);
1325 		if (!(m->m_flags & M_EXT)) {
1326 			m_free(m);
1327 			m = NULL;
1328 		}
1329 	}
1330 	if (m == NULL) {
1331 		return NULL;
1332 	}
1333 	m->m_pkthdr.len = m->m_len = len;
1334 	m->m_pkthdr.rcvif = NULL;
1335 	rtm_buf = mtod(m, int32_t *);
1336 	bzero(rtm_buf, len);
1337 	off = len;
1338 	for (i = 0; i < RTAX_MAX; i++) {
1339 		struct sockaddr *sa, *hint;
1340 		uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1341 
1342 		/*
1343 		 * Make sure to accomodate the largest possible size of sa_len.
1344 		 */
1345 		static_assert(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1346 
1347 		if ((sa = rtinfo->rti_info[i]) == NULL) {
1348 			continue;
1349 		}
1350 
1351 		switch (i) {
1352 		case RTAX_DST:
1353 		case RTAX_NETMASK:
1354 			if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1355 				hint = rtinfo->rti_info[RTAX_IFA];
1356 			}
1357 
1358 			/* Scrub away any trace of embedded interface scope */
1359 			sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1360 			    sizeof(ssbuf), NULL);
1361 			break;
1362 
1363 		default:
1364 			break;
1365 		}
1366 
1367 		rtinfo->rti_addrs |= (1 << i);
1368 		dlen = sa->sa_len;
1369 		m_copyback(m, off, dlen, __SA_UTILS_CONV_TO_BYTES(sa));
1370 		len = off + dlen;
1371 		off += ROUNDUP32(dlen);
1372 	}
1373 	if (m->m_pkthdr.len != len) {
1374 		m_freem(m);
1375 		return NULL;
1376 	}
1377 	rtmh = (struct rt_msghdr_common *)rtm_buf;
1378 	rtmh->rtm_msglen = (u_short)len;
1379 	rtmh->rtm_version = RTM_VERSION;
1380 	rtmh->rtm_type = type;
1381 	return m;
1382 }
1383 
1384 static int
rt_msg2(u_char type,struct rt_addrinfo * rtinfo,caddr_t cp __header_indexable,struct walkarg * w,kauth_cred_t * credp)1385 rt_msg2(u_char type, struct rt_addrinfo *rtinfo, caddr_t cp __header_indexable, struct walkarg *w,
1386     kauth_cred_t* credp)
1387 {
1388 	int i;
1389 	int len, dlen, rlen, second_time = 0;
1390 	caddr_t cp0;
1391 
1392 	rtinfo->rti_addrs = 0;
1393 again:
1394 	switch (type) {
1395 	case RTM_DELADDR:
1396 	case RTM_NEWADDR:
1397 		len = sizeof(struct ifa_msghdr);
1398 		break;
1399 
1400 	case RTM_DELMADDR:
1401 	case RTM_NEWMADDR:
1402 		len = sizeof(struct ifma_msghdr);
1403 		break;
1404 
1405 	case RTM_IFINFO:
1406 		len = sizeof(struct if_msghdr);
1407 		break;
1408 
1409 	case RTM_IFINFO2:
1410 		len = sizeof(struct if_msghdr2);
1411 		break;
1412 
1413 	case RTM_NEWMADDR2:
1414 		len = sizeof(struct ifma_msghdr2);
1415 		break;
1416 
1417 	case RTM_GET_EXT:
1418 		len = sizeof(struct rt_msghdr_ext);
1419 		break;
1420 
1421 	case RTM_GET2:
1422 		len = sizeof(struct rt_msghdr2);
1423 		break;
1424 
1425 	default:
1426 		len = sizeof(struct rt_msghdr);
1427 	}
1428 	cp0 = cp;
1429 	if (cp0) {
1430 		cp += len;
1431 	}
1432 	for (i = 0; i < RTAX_MAX; i++) {
1433 		struct sockaddr *sa, *hint;
1434 		uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1435 
1436 		/*
1437 		 * Make sure to accomodate the largest possible size of sa_len.
1438 		 */
1439 		static_assert(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1440 
1441 		if ((sa = rtinfo->rti_info[i]) == NULL) {
1442 			continue;
1443 		}
1444 
1445 		switch (i) {
1446 		case RTAX_DST:
1447 		case RTAX_NETMASK:
1448 			if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1449 				hint = rtinfo->rti_info[RTAX_IFA];
1450 			}
1451 
1452 			/* Scrub away any trace of embedded interface scope */
1453 			sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1454 			    sizeof(ssbuf), NULL);
1455 			break;
1456 		case RTAX_GATEWAY:
1457 		case RTAX_IFP:
1458 			sa = rtm_scrub(type, i, NULL, sa, &ssbuf,
1459 			    sizeof(ssbuf), credp);
1460 			break;
1461 
1462 		default:
1463 			break;
1464 		}
1465 
1466 		rtinfo->rti_addrs |= (1 << i);
1467 		dlen = sa->sa_len;
1468 		rlen = ROUNDUP32(dlen);
1469 		if (cp) {
1470 			SOCKADDR_COPY(sa, cp, dlen);
1471 			if (dlen != rlen) {
1472 				bzero(cp + dlen, rlen - dlen);
1473 			}
1474 			cp += rlen;
1475 		}
1476 		len += rlen;
1477 	}
1478 	if (cp == NULL && w != NULL && !second_time) {
1479 		walkarg_ref_t rw = w;
1480 
1481 		if (rw->w_req != NULL) {
1482 			if (rw->w_tmemsize < len) {
1483 				if (rw->w_tmem != NULL) {
1484 					kfree_data_sized_by(rw->w_tmem, rw->w_tmemsize);
1485 				}
1486 				caddr_t new_tmem = (caddr_t)kalloc_data(len, Z_ZERO | Z_WAITOK);
1487 				if (new_tmem != NULL) {
1488 					rw->w_tmemsize = len;
1489 					rw->w_tmem = new_tmem;
1490 				}
1491 			}
1492 			if (rw->w_tmem != NULL) {
1493 				cp = rw->w_tmem;
1494 				second_time = 1;
1495 				goto again;
1496 			}
1497 		}
1498 	}
1499 	if (cp) {
1500 		struct rt_msghdr_common *rtmh = (struct rt_msghdr_common *)(void *)cp0;
1501 
1502 		rtmh->rtm_version = RTM_VERSION;
1503 		rtmh->rtm_type = type;
1504 		rtmh->rtm_msglen = (u_short)len;
1505 	}
1506 	return len;
1507 }
1508 
1509 /*
1510  * This routine is called to generate a message from the routing
1511  * socket indicating that a redirect has occurred, a routing lookup
1512  * has failed, or that a protocol has detected timeouts to a particular
1513  * destination.
1514  */
1515 void
rt_missmsg(u_char type,struct rt_addrinfo * rtinfo,int flags,int error)1516 rt_missmsg(u_char type, struct rt_addrinfo *rtinfo, int flags, int error)
1517 {
1518 	struct rt_msghdr_common *rtmh;
1519 	struct mbuf *m;
1520 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1521 	struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1522 
1523 	if (route_cb.any_count == 0) {
1524 		return;
1525 	}
1526 	m = rt_msg1(type, rtinfo);
1527 	if (m == NULL) {
1528 		return;
1529 	}
1530 	rtmh = mtod(m, struct rt_msghdr_common *);
1531 	rtmh->rtm_flags = RTF_DONE | flags;
1532 	rtmh->rtm_errno = error;
1533 	rtmh->rtm_addrs = rtinfo->rti_addrs;
1534 	route_proto.sp_family = sa ? sa->sa_family : 0;
1535 	raw_input(m, &route_proto, &route_src, &route_dst);
1536 }
1537 
1538 /*
1539  * This routine is called to generate a message from the routing
1540  * socket indicating that the status of a network interface has changed.
1541  */
1542 void
rt_ifmsg(struct ifnet * ifp)1543 rt_ifmsg(struct ifnet *ifp)
1544 {
1545 	struct if_msghdr *ifm;
1546 	struct mbuf *m;
1547 	struct rt_addrinfo info;
1548 	struct  sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1549 
1550 	if (route_cb.any_count == 0) {
1551 		return;
1552 	}
1553 	bzero((caddr_t)&info, sizeof(info));
1554 	m = rt_msg1(RTM_IFINFO, &info);
1555 	if (m == NULL) {
1556 		return;
1557 	}
1558 	ifm = mtod(m, struct if_msghdr *);
1559 	ifm->ifm_index = ifp->if_index;
1560 	ifm->ifm_flags = (u_short)ifp->if_flags;
1561 	if_data_internal_to_if_data(ifp, &ifp->if_data, &ifm->ifm_data);
1562 	ifm->ifm_addrs = 0;
1563 	raw_input(m, &route_proto, &route_src, &route_dst);
1564 }
1565 
1566 /*
1567  * This is called to generate messages from the routing socket
1568  * indicating a network interface has had addresses associated with it.
1569  * if we ever reverse the logic and replace messages TO the routing
1570  * socket indicate a request to configure interfaces, then it will
1571  * be unnecessary as the routing socket will automatically generate
1572  * copies of it.
1573  *
1574  * Since this is coming from the interface, it is expected that the
1575  * interface will be locked.  Caller must hold rnh_lock and rt_lock.
1576  */
1577 void
rt_newaddrmsg(u_char cmd,struct ifaddr * ifa,int error,struct rtentry * rt)1578 rt_newaddrmsg(u_char cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1579 {
1580 	struct rt_addrinfo info;
1581 	struct sockaddr *sa = 0;
1582 	int pass;
1583 	struct mbuf *m = 0;
1584 	struct ifnet *ifp = ifa->ifa_ifp;
1585 	struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1586 
1587 	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1588 	RT_LOCK_ASSERT_HELD(rt);
1589 
1590 	if (route_cb.any_count == 0) {
1591 		return;
1592 	}
1593 
1594 	/* Become a regular mutex, just in case */
1595 	RT_CONVERT_LOCK(rt);
1596 	for (pass = 1; pass < 3; pass++) {
1597 		bzero((caddr_t)&info, sizeof(info));
1598 		if ((cmd == RTM_ADD && pass == 1) ||
1599 		    (cmd == RTM_DELETE && pass == 2)) {
1600 			struct ifa_msghdr *ifam;
1601 			u_char ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1602 
1603 			/* Lock ifp for if_lladdr */
1604 			ifnet_lock_shared(ifp);
1605 			IFA_LOCK(ifa);
1606 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1607 			/*
1608 			 * Holding ifnet lock here prevents the link address
1609 			 * from changing contents, so no need to hold its
1610 			 * lock.  The link address is always present; it's
1611 			 * never freed.
1612 			 */
1613 			info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1614 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1615 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1616 			if ((m = rt_msg1(ncmd, &info)) == NULL) {
1617 				IFA_UNLOCK(ifa);
1618 				ifnet_lock_done(ifp);
1619 				continue;
1620 			}
1621 			IFA_UNLOCK(ifa);
1622 			ifnet_lock_done(ifp);
1623 			ifam = mtod(m, struct ifa_msghdr *);
1624 			ifam->ifam_index = ifp->if_index;
1625 			IFA_LOCK_SPIN(ifa);
1626 			ifam->ifam_metric = ifa->ifa_metric;
1627 			ifam->ifam_flags = ifa->ifa_flags;
1628 			IFA_UNLOCK(ifa);
1629 			ifam->ifam_addrs = info.rti_addrs;
1630 		}
1631 		if ((cmd == RTM_ADD && pass == 2) ||
1632 		    (cmd == RTM_DELETE && pass == 1)) {
1633 			struct rt_msghdr *rtm;
1634 
1635 			if (rt == NULL) {
1636 				continue;
1637 			}
1638 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1639 			info.rti_info[RTAX_DST] = sa = rt_key(rt);
1640 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1641 			if ((m = rt_msg1(cmd, &info)) == NULL) {
1642 				continue;
1643 			}
1644 			rtm = mtod(m, struct rt_msghdr *);
1645 			rtm->rtm_index = ifp->if_index;
1646 			rtm->rtm_flags |= rt->rt_flags;
1647 			rtm->rtm_errno = error;
1648 			rtm->rtm_addrs = info.rti_addrs;
1649 		}
1650 		route_proto.sp_protocol = sa ? sa->sa_family : 0;
1651 		raw_input(m, &route_proto, &route_src, &route_dst);
1652 	}
1653 }
1654 
1655 /*
1656  * This is the analogue to the rt_newaddrmsg which performs the same
1657  * function but for multicast group memberhips.  This is easier since
1658  * there is no route state to worry about.
1659  */
1660 void
rt_newmaddrmsg(u_char cmd,struct ifmultiaddr * ifma)1661 rt_newmaddrmsg(u_char cmd, struct ifmultiaddr *ifma)
1662 {
1663 	struct rt_addrinfo info;
1664 	struct mbuf *m = 0;
1665 	struct ifnet *ifp = ifma->ifma_ifp;
1666 	struct ifma_msghdr *ifmam;
1667 	struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1668 
1669 	if (route_cb.any_count == 0) {
1670 		return;
1671 	}
1672 
1673 	/* Lock ifp for if_lladdr */
1674 	ifnet_lock_shared(ifp);
1675 	bzero((caddr_t)&info, sizeof(info));
1676 	IFMA_LOCK(ifma);
1677 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1678 	/* lladdr doesn't need lock */
1679 	info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1680 
1681 	/*
1682 	 * If a link-layer address is present, present it as a ``gateway''
1683 	 * (similarly to how ARP entries, e.g., are presented).
1684 	 */
1685 	info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ?
1686 	    ifma->ifma_ll->ifma_addr : NULL;
1687 	if ((m = rt_msg1(cmd, &info)) == NULL) {
1688 		IFMA_UNLOCK(ifma);
1689 		ifnet_lock_done(ifp);
1690 		return;
1691 	}
1692 	ifmam = mtod(m, struct ifma_msghdr *);
1693 	ifmam->ifmam_index = ifp->if_index;
1694 	ifmam->ifmam_addrs = info.rti_addrs;
1695 	route_proto.sp_protocol = ifma->ifma_addr->sa_family;
1696 	IFMA_UNLOCK(ifma);
1697 	ifnet_lock_done(ifp);
1698 	raw_input(m, &route_proto, &route_src, &route_dst);
1699 }
1700 
1701 const char *
rtm2str(int cmd)1702 rtm2str(int cmd)
1703 {
1704 	const char *c __null_terminated = "RTM_?";
1705 
1706 	switch (cmd) {
1707 	case RTM_ADD:
1708 		c = "RTM_ADD";
1709 		break;
1710 	case RTM_DELETE:
1711 		c = "RTM_DELETE";
1712 		break;
1713 	case RTM_CHANGE:
1714 		c = "RTM_CHANGE";
1715 		break;
1716 	case RTM_GET:
1717 		c = "RTM_GET";
1718 		break;
1719 	case RTM_LOSING:
1720 		c = "RTM_LOSING";
1721 		break;
1722 	case RTM_REDIRECT:
1723 		c = "RTM_REDIRECT";
1724 		break;
1725 	case RTM_MISS:
1726 		c = "RTM_MISS";
1727 		break;
1728 	case RTM_LOCK:
1729 		c = "RTM_LOCK";
1730 		break;
1731 	case RTM_OLDADD:
1732 		c = "RTM_OLDADD";
1733 		break;
1734 	case RTM_OLDDEL:
1735 		c = "RTM_OLDDEL";
1736 		break;
1737 	case RTM_RESOLVE:
1738 		c = "RTM_RESOLVE";
1739 		break;
1740 	case RTM_NEWADDR:
1741 		c = "RTM_NEWADDR";
1742 		break;
1743 	case RTM_DELADDR:
1744 		c = "RTM_DELADDR";
1745 		break;
1746 	case RTM_IFINFO:
1747 		c = "RTM_IFINFO";
1748 		break;
1749 	case RTM_NEWMADDR:
1750 		c = "RTM_NEWMADDR";
1751 		break;
1752 	case RTM_DELMADDR:
1753 		c = "RTM_DELMADDR";
1754 		break;
1755 	case RTM_GET_SILENT:
1756 		c = "RTM_GET_SILENT";
1757 		break;
1758 	case RTM_IFINFO2:
1759 		c = "RTM_IFINFO2";
1760 		break;
1761 	case RTM_NEWMADDR2:
1762 		c = "RTM_NEWMADDR2";
1763 		break;
1764 	case RTM_GET2:
1765 		c = "RTM_GET2";
1766 		break;
1767 	case RTM_GET_EXT:
1768 		c = "RTM_GET_EXT";
1769 		break;
1770 	}
1771 
1772 	return c;
1773 }
1774 
1775 /*
1776  * This is used in dumping the kernel table via sysctl().
1777  */
1778 static int
sysctl_dumpentry(struct radix_node * rn,void * vw)1779 sysctl_dumpentry(struct radix_node *rn, void *vw)
1780 {
1781 	walkarg_ref_t w = vw;
1782 	rtentry_ref_t rt = rn_rtentry(rn);
1783 	int error = 0, size;
1784 	struct rt_addrinfo info;
1785 	kauth_cred_t cred __single;
1786 	kauth_cred_t *credp;
1787 
1788 	cred = current_cached_proc_cred(PROC_NULL);
1789 	credp = &cred;
1790 
1791 	RT_LOCK(rt);
1792 	if ((w->w_op == NET_RT_FLAGS || w->w_op == NET_RT_FLAGS_PRIV) &&
1793 	    !(rt->rt_flags & w->w_arg)) {
1794 		goto done;
1795 	}
1796 
1797 	/*
1798 	 * If the matching route has RTF_LLINFO set, then we can skip scrubbing the MAC
1799 	 * only if the outgoing interface is not loopback and the process has entitlement
1800 	 * for neighbor cache read.
1801 	 */
1802 	if (w->w_op == NET_RT_FLAGS_PRIV && (rt->rt_flags & RTF_LLINFO)) {
1803 		if (rt->rt_ifp != lo_ifp &&
1804 		    (route_op_entitlement_check(NULL, cred, ROUTE_OP_READ, TRUE) == 0)) {
1805 			credp = NULL;
1806 		}
1807 	}
1808 
1809 	bzero((caddr_t)&info, sizeof(info));
1810 	info.rti_info[RTAX_DST] = rt_key(rt);
1811 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1812 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1813 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1814 	if (RT_HAS_IFADDR(rt)) {
1815 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1816 	}
1817 
1818 	if (w->w_op != NET_RT_DUMP2) {
1819 		size = rt_msg2(RTM_GET, &info, NULL, w, credp);
1820 		if (w->w_req != NULL && w->w_tmem != NULL) {
1821 			struct rt_msghdr *rtm =
1822 			    (struct rt_msghdr *)(void *)w->w_tmem;
1823 
1824 			rtm->rtm_flags = rt->rt_flags;
1825 			rtm->rtm_use = rt->rt_use;
1826 			rt_getmetrics(rt, &rtm->rtm_rmx);
1827 			rtm->rtm_index = rt->rt_ifp->if_index;
1828 			rtm->rtm_pid = 0;
1829 			rtm->rtm_seq = 0;
1830 			rtm->rtm_errno = 0;
1831 			rtm->rtm_addrs = info.rti_addrs;
1832 			error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1833 		}
1834 	} else {
1835 		size = rt_msg2(RTM_GET2, &info, NULL, w, credp);
1836 		if (w->w_req != NULL && w->w_tmem != NULL) {
1837 			struct rt_msghdr2 *rtm =
1838 			    (struct rt_msghdr2 *)(void *)w->w_tmem;
1839 
1840 			rtm->rtm_flags = rt->rt_flags;
1841 			rtm->rtm_use = rt->rt_use;
1842 			rt_getmetrics(rt, &rtm->rtm_rmx);
1843 			rtm->rtm_index = rt->rt_ifp->if_index;
1844 			rtm->rtm_refcnt = rt->rt_refcnt;
1845 			if (rt->rt_parent) {
1846 				rtm->rtm_parentflags = rt->rt_parent->rt_flags;
1847 			} else {
1848 				rtm->rtm_parentflags = 0;
1849 			}
1850 			rtm->rtm_reserved = 0;
1851 			rtm->rtm_addrs = info.rti_addrs;
1852 			error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1853 		}
1854 	}
1855 
1856 done:
1857 	RT_UNLOCK(rt);
1858 	return error;
1859 }
1860 
1861 /*
1862  * This is used for dumping extended information from route entries.
1863  */
1864 static int
sysctl_dumpentry_ext(struct radix_node * rn,void * vw)1865 sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
1866 {
1867 	walkarg_ref_t w = vw;
1868 	rtentry_ref_t rt = rn_rtentry(rn);
1869 	int error = 0, size;
1870 	struct rt_addrinfo info;
1871 	kauth_cred_t cred __single;
1872 
1873 	cred = current_cached_proc_cred(PROC_NULL);
1874 
1875 	RT_LOCK(rt);
1876 	if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) {
1877 		goto done;
1878 	}
1879 	bzero(&info, sizeof(info));
1880 	info.rti_info[RTAX_DST] = rt_key(rt);
1881 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1882 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1883 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1884 
1885 	size = rt_msg2(RTM_GET_EXT, &info, NULL, w, &cred);
1886 	if (w->w_req != NULL && w->w_tmem != NULL) {
1887 		struct rt_msghdr_ext *ertm =
1888 		    (struct rt_msghdr_ext *)(void *)w->w_tmem;
1889 
1890 		ertm->rtm_flags = rt->rt_flags;
1891 		ertm->rtm_use = rt->rt_use;
1892 		rt_getmetrics(rt, &ertm->rtm_rmx);
1893 		ertm->rtm_index = rt->rt_ifp->if_index;
1894 		ertm->rtm_pid = 0;
1895 		ertm->rtm_seq = 0;
1896 		ertm->rtm_errno = 0;
1897 		ertm->rtm_addrs = info.rti_addrs;
1898 		if (rt->rt_llinfo_get_ri == NULL) {
1899 			bzero(&ertm->rtm_ri, sizeof(ertm->rtm_ri));
1900 			ertm->rtm_ri.ri_rssi = IFNET_RSSI_UNKNOWN;
1901 			ertm->rtm_ri.ri_lqm = IFNET_LQM_THRESH_OFF;
1902 			ertm->rtm_ri.ri_npm = IFNET_NPM_THRESH_UNKNOWN;
1903 		} else {
1904 			rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri);
1905 		}
1906 		error = SYSCTL_OUT(w->w_req, (caddr_t)ertm, size);
1907 	}
1908 
1909 done:
1910 	RT_UNLOCK(rt);
1911 	return error;
1912 }
1913 
1914 static boolean_t
should_include_clat46(void)1915 should_include_clat46(void)
1916 {
1917 #define CLAT46_ENTITLEMENT "com.apple.private.route.iflist.include-clat46"
1918 	return IOCurrentTaskHasEntitlement(CLAT46_ENTITLEMENT);
1919 }
1920 
1921 static boolean_t
is_clat46_address(struct ifaddr * ifa)1922 is_clat46_address(struct ifaddr *ifa)
1923 {
1924 	boolean_t       is_clat46 = FALSE;
1925 
1926 	if (ifa->ifa_addr->sa_family == AF_INET6) {
1927 		struct in6_ifaddr *ifa6 = ifatoia6(ifa);
1928 
1929 		is_clat46 = (ifa6->ia6_flags & IN6_IFF_CLAT46) != 0;
1930 	}
1931 	return is_clat46;
1932 }
1933 
1934 /*
1935  * rdar://9307819
1936  * To avoid to call copyout() while holding locks and to cause problems
1937  * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct
1938  * the list in two passes. In the first pass we compute the total
1939  * length of the data we are going to copyout, then we release
1940  * all locks to allocate a temporary buffer that gets filled
1941  * in the second pass.
1942  *
1943  * Note that we are verifying the assumption that kalloc() returns a buffer
1944  * that is at least 32 bits aligned and that the messages and addresses are
1945  * 32 bits aligned.
1946  */
1947 static int
sysctl_iflist(int af,struct walkarg * w)1948 sysctl_iflist(int af, struct walkarg *w)
1949 {
1950 	struct ifnet *ifp;
1951 	struct ifaddr *ifa;
1952 	struct  rt_addrinfo info;
1953 	int     error = 0;
1954 	int     pass = 0;
1955 	size_t  len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1956 	char    *total_buffer = NULL, *cp = NULL;
1957 	kauth_cred_t cred __single;
1958 	boolean_t include_clat46 = FALSE;
1959 	boolean_t include_clat46_valid = FALSE;
1960 
1961 	cred = current_cached_proc_cred(PROC_NULL);
1962 
1963 	bzero((caddr_t)&info, sizeof(info));
1964 
1965 	for (pass = 0; pass < 2; pass++) {
1966 		ifnet_head_lock_shared();
1967 
1968 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1969 			if (error) {
1970 				break;
1971 			}
1972 			if (w->w_arg && w->w_arg != ifp->if_index) {
1973 				continue;
1974 			}
1975 			ifnet_lock_shared(ifp);
1976 			/*
1977 			 * Holding ifnet lock here prevents the link address
1978 			 * from changing contents, so no need to hold the ifa
1979 			 * lock.  The link address is always present; it's
1980 			 * never freed.
1981 			 */
1982 			ifa = ifp->if_lladdr;
1983 			info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1984 			len = rt_msg2(RTM_IFINFO, &info, NULL, NULL, &cred);
1985 			if (pass == 0) {
1986 				if (os_add_overflow(total_len, len, &total_len)) {
1987 					ifnet_lock_done(ifp);
1988 					error = ENOBUFS;
1989 					break;
1990 				}
1991 			} else {
1992 				struct if_msghdr *ifm;
1993 
1994 				if (current_len + len > total_len) {
1995 					ifnet_lock_done(ifp);
1996 					error = ENOBUFS;
1997 					break;
1998 				}
1999 				info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2000 				len = rt_msg2(RTM_IFINFO, &info,
2001 				    (caddr_t)cp, NULL, &cred);
2002 				info.rti_info[RTAX_IFP] = NULL;
2003 
2004 				ifm = (struct if_msghdr *)(void *)cp;
2005 				ifm->ifm_index = ifp->if_index;
2006 				ifm->ifm_flags = (u_short)ifp->if_flags;
2007 				if_data_internal_to_if_data(ifp, &ifp->if_data,
2008 				    &ifm->ifm_data);
2009 				ifm->ifm_addrs = info.rti_addrs;
2010 				/*
2011 				 * <rdar://problem/32940901>
2012 				 * Round bytes only for non-platform
2013 				 */
2014 				if (!csproc_get_platform_binary(w->w_req->p)) {
2015 					ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
2016 					ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
2017 				}
2018 
2019 				cp += len;
2020 				VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2021 				current_len += len;
2022 				VERIFY(current_len <= total_len);
2023 			}
2024 			while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
2025 				boolean_t is_clat46;
2026 
2027 				IFA_LOCK(ifa);
2028 				if (af && af != ifa->ifa_addr->sa_family) {
2029 					IFA_UNLOCK(ifa);
2030 					continue;
2031 				}
2032 				is_clat46 = is_clat46_address(ifa);
2033 				if (is_clat46) {
2034 					if (!include_clat46_valid) {
2035 						include_clat46_valid = TRUE;
2036 						include_clat46 =
2037 						    should_include_clat46();
2038 					}
2039 					if (!include_clat46) {
2040 						IFA_UNLOCK(ifa);
2041 						continue;
2042 					}
2043 				}
2044 				info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2045 				info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2046 				info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2047 				len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2048 				    &cred);
2049 				if (pass == 0) {
2050 					if (os_add_overflow(total_len, len, &total_len)) {
2051 						IFA_UNLOCK(ifa);
2052 						error = ENOBUFS;
2053 						break;
2054 					}
2055 				} else {
2056 					struct ifa_msghdr *ifam;
2057 
2058 					if (current_len + len > total_len) {
2059 						IFA_UNLOCK(ifa);
2060 						error = ENOBUFS;
2061 						break;
2062 					}
2063 					len = rt_msg2(RTM_NEWADDR, &info,
2064 					    (caddr_t)cp, NULL, &cred);
2065 
2066 					ifam = (struct ifa_msghdr *)(void *)cp;
2067 					ifam->ifam_index =
2068 					    ifa->ifa_ifp->if_index;
2069 					ifam->ifam_flags = ifa->ifa_flags;
2070 					ifam->ifam_metric = ifa->ifa_metric;
2071 					ifam->ifam_addrs = info.rti_addrs;
2072 
2073 					cp += len;
2074 					VERIFY(IS_P2ALIGNED(cp,
2075 					    sizeof(u_int32_t)));
2076 					current_len += len;
2077 					VERIFY(current_len <= total_len);
2078 				}
2079 				IFA_UNLOCK(ifa);
2080 			}
2081 			ifnet_lock_done(ifp);
2082 			info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2083 			    info.rti_info[RTAX_BRD] = NULL;
2084 		}
2085 
2086 		ifnet_head_done();
2087 
2088 		if (error != 0) {
2089 			if (error == ENOBUFS) {
2090 				printf("%s: current_len (%lu) + len (%lu) > "
2091 				    "total_len (%lu)\n", __func__, current_len,
2092 				    len, total_len);
2093 			}
2094 			break;
2095 		}
2096 
2097 		if (pass == 0) {
2098 			/* Better to return zero length buffer than ENOBUFS */
2099 			if (total_len == 0) {
2100 				total_len = 1;
2101 			}
2102 			total_len += total_len >> 3;
2103 			total_buffer_len = total_len;
2104 			total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2105 			if (total_buffer == NULL) {
2106 				printf("%s: kalloc_data(%lu) failed\n", __func__,
2107 				    total_len);
2108 				error = ENOBUFS;
2109 				break;
2110 			}
2111 			cp = total_buffer;
2112 			VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2113 		} else {
2114 			error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2115 			if (error) {
2116 				break;
2117 			}
2118 		}
2119 	}
2120 
2121 	if (total_buffer != NULL) {
2122 		kfree_data(total_buffer, total_buffer_len);
2123 	}
2124 
2125 	return error;
2126 }
2127 
2128 static int
sysctl_iflist2(int af,struct walkarg * w)2129 sysctl_iflist2(int af, struct walkarg *w)
2130 {
2131 	struct ifnet *ifp;
2132 	struct ifaddr *ifa;
2133 	struct  rt_addrinfo info;
2134 	int     error = 0;
2135 	int     pass = 0;
2136 	size_t  len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
2137 	char    *total_buffer = NULL, *cp = NULL;
2138 	kauth_cred_t cred __single;
2139 	boolean_t include_clat46 = FALSE;
2140 	boolean_t include_clat46_valid = FALSE;
2141 
2142 	cred = current_cached_proc_cred(PROC_NULL);
2143 
2144 	bzero((caddr_t)&info, sizeof(info));
2145 
2146 	for (pass = 0; pass < 2; pass++) {
2147 		struct ifmultiaddr *ifma;
2148 
2149 		ifnet_head_lock_shared();
2150 
2151 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
2152 			if (error) {
2153 				break;
2154 			}
2155 			if (w->w_arg && w->w_arg != ifp->if_index) {
2156 				continue;
2157 			}
2158 			ifnet_lock_shared(ifp);
2159 			/*
2160 			 * Holding ifnet lock here prevents the link address
2161 			 * from changing contents, so no need to hold the ifa
2162 			 * lock.  The link address is always present; it's
2163 			 * never freed.
2164 			 */
2165 			ifa = ifp->if_lladdr;
2166 			info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2167 			len = rt_msg2(RTM_IFINFO2, &info, NULL, NULL, &cred);
2168 			if (pass == 0) {
2169 				if (os_add_overflow(total_len, len, &total_len)) {
2170 					ifnet_lock_done(ifp);
2171 					error = ENOBUFS;
2172 					break;
2173 				}
2174 			} else {
2175 				struct if_msghdr2 *ifm;
2176 
2177 				if (current_len + len > total_len) {
2178 					ifnet_lock_done(ifp);
2179 					error = ENOBUFS;
2180 					break;
2181 				}
2182 				info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2183 				len = rt_msg2(RTM_IFINFO2, &info,
2184 				    (caddr_t)cp, NULL, &cred);
2185 				info.rti_info[RTAX_IFP] = NULL;
2186 
2187 				ifm = (struct if_msghdr2 *)(void *)cp;
2188 				ifm->ifm_addrs = info.rti_addrs;
2189 				ifm->ifm_flags = (u_short)ifp->if_flags;
2190 				ifm->ifm_index = ifp->if_index;
2191 				ifm->ifm_snd_len = IFCQ_LEN(ifp->if_snd);
2192 				ifm->ifm_snd_maxlen = IFCQ_MAXLEN(ifp->if_snd);
2193 				ifm->ifm_snd_drops =
2194 				    (int)ifp->if_snd->ifcq_dropcnt.packets;
2195 				ifm->ifm_timer = ifp->if_timer;
2196 				if_data_internal_to_if_data64(ifp,
2197 				    &ifp->if_data, &ifm->ifm_data);
2198 				/*
2199 				 * <rdar://problem/32940901>
2200 				 * Round bytes only for non-platform
2201 				 */
2202 				if (!csproc_get_platform_binary(w->w_req->p)) {
2203 					ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
2204 					ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
2205 				}
2206 
2207 				cp += len;
2208 				VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2209 				current_len += len;
2210 				VERIFY(current_len <= total_len);
2211 			}
2212 			while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
2213 				boolean_t is_clat46;
2214 
2215 				IFA_LOCK(ifa);
2216 				if (af && af != ifa->ifa_addr->sa_family) {
2217 					IFA_UNLOCK(ifa);
2218 					continue;
2219 				}
2220 				is_clat46 = is_clat46_address(ifa);
2221 				if (is_clat46) {
2222 					if (!include_clat46_valid) {
2223 						include_clat46_valid = TRUE;
2224 						include_clat46 =
2225 						    should_include_clat46();
2226 					}
2227 					if (!include_clat46) {
2228 						IFA_UNLOCK(ifa);
2229 						continue;
2230 					}
2231 				}
2232 				info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2233 				info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2234 				info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2235 				len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2236 				    &cred);
2237 				if (pass == 0) {
2238 					if (os_add_overflow(total_len, len, &total_len)) {
2239 						IFA_UNLOCK(ifa);
2240 						error = ENOBUFS;
2241 						break;
2242 					}
2243 				} else {
2244 					struct ifa_msghdr *ifam;
2245 
2246 					if (current_len + len > total_len) {
2247 						IFA_UNLOCK(ifa);
2248 						error = ENOBUFS;
2249 						break;
2250 					}
2251 					len = rt_msg2(RTM_NEWADDR, &info,
2252 					    (caddr_t)cp, NULL, &cred);
2253 
2254 					ifam = (struct ifa_msghdr *)(void *)cp;
2255 					ifam->ifam_index =
2256 					    ifa->ifa_ifp->if_index;
2257 					ifam->ifam_flags = ifa->ifa_flags;
2258 					ifam->ifam_metric = ifa->ifa_metric;
2259 					ifam->ifam_addrs = info.rti_addrs;
2260 
2261 					cp += len;
2262 					VERIFY(IS_P2ALIGNED(cp,
2263 					    sizeof(u_int32_t)));
2264 					current_len += len;
2265 					VERIFY(current_len <= total_len);
2266 				}
2267 				IFA_UNLOCK(ifa);
2268 			}
2269 			if (error) {
2270 				ifnet_lock_done(ifp);
2271 				break;
2272 			}
2273 
2274 			for (ifma = LIST_FIRST(&ifp->if_multiaddrs);
2275 			    ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) {
2276 				struct ifaddr *ifa0;
2277 
2278 				IFMA_LOCK(ifma);
2279 				if (af && af != ifma->ifma_addr->sa_family) {
2280 					IFMA_UNLOCK(ifma);
2281 					continue;
2282 				}
2283 				bzero((caddr_t)&info, sizeof(info));
2284 				info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2285 				/*
2286 				 * Holding ifnet lock here prevents the link
2287 				 * address from changing contents, so no need
2288 				 * to hold the ifa0 lock.  The link address is
2289 				 * always present; it's never freed.
2290 				 */
2291 				ifa0 = ifp->if_lladdr;
2292 				info.rti_info[RTAX_IFP] = ifa0->ifa_addr;
2293 				if (ifma->ifma_ll != NULL) {
2294 					info.rti_info[RTAX_GATEWAY] =
2295 					    ifma->ifma_ll->ifma_addr;
2296 				}
2297 				len = rt_msg2(RTM_NEWMADDR2, &info, NULL, NULL,
2298 				    &cred);
2299 				if (pass == 0) {
2300 					total_len += len;
2301 				} else {
2302 					struct ifma_msghdr2 *ifmam;
2303 
2304 					if (current_len + len > total_len) {
2305 						IFMA_UNLOCK(ifma);
2306 						error = ENOBUFS;
2307 						break;
2308 					}
2309 					len = rt_msg2(RTM_NEWMADDR2, &info,
2310 					    (caddr_t)cp, NULL, &cred);
2311 
2312 					ifmam =
2313 					    (struct ifma_msghdr2 *)(void *)cp;
2314 					ifmam->ifmam_addrs = info.rti_addrs;
2315 					ifmam->ifmam_flags = 0;
2316 					ifmam->ifmam_index =
2317 					    ifma->ifma_ifp->if_index;
2318 					ifmam->ifmam_refcount =
2319 					    ifma->ifma_reqcnt;
2320 
2321 					cp += len;
2322 					VERIFY(IS_P2ALIGNED(cp,
2323 					    sizeof(u_int32_t)));
2324 					current_len += len;
2325 				}
2326 				IFMA_UNLOCK(ifma);
2327 			}
2328 			ifnet_lock_done(ifp);
2329 			info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2330 			    info.rti_info[RTAX_BRD] = NULL;
2331 		}
2332 		ifnet_head_done();
2333 
2334 		if (error) {
2335 			if (error == ENOBUFS) {
2336 				printf("%s: current_len (%lu) + len (%lu) > "
2337 				    "total_len (%lu)\n", __func__, current_len,
2338 				    len, total_len);
2339 			}
2340 			break;
2341 		}
2342 
2343 		if (pass == 0) {
2344 			/* Better to return zero length buffer than ENOBUFS */
2345 			if (total_len == 0) {
2346 				total_len = 1;
2347 			}
2348 			total_len += total_len >> 3;
2349 			total_buffer_len = total_len;
2350 			total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2351 			if (total_buffer == NULL) {
2352 				printf("%s: kalloc_data(%lu) failed\n", __func__,
2353 				    total_len);
2354 				error = ENOBUFS;
2355 				break;
2356 			}
2357 			cp = total_buffer;
2358 			VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2359 		} else {
2360 			error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2361 			if (error) {
2362 				break;
2363 			}
2364 		}
2365 	}
2366 
2367 	if (total_buffer != NULL) {
2368 		kfree_data(total_buffer, total_buffer_len);
2369 	}
2370 
2371 	return error;
2372 }
2373 
2374 
2375 static int
sysctl_rtstat(struct sysctl_req * req)2376 sysctl_rtstat(struct sysctl_req *req)
2377 {
2378 	struct rtstat rtstat_compat = { 0 };
2379 
2380 #define RTSTAT_COMPAT(_field) rtstat_compat._field = rtstat._field < SHRT_MAX ? (short)rtstat._field : SHRT_MAX
2381 	RTSTAT_COMPAT(rts_badredirect);
2382 	RTSTAT_COMPAT(rts_dynamic);
2383 	RTSTAT_COMPAT(rts_newgateway);
2384 	RTSTAT_COMPAT(rts_unreach);
2385 	RTSTAT_COMPAT(rts_wildcard);
2386 	RTSTAT_COMPAT(rts_badrtgwroute);
2387 #undef RTSTAT_TO_COMPAT
2388 
2389 	return SYSCTL_OUT(req, &rtstat_compat, sizeof(struct rtstat));
2390 }
2391 
2392 static int
sysctl_rtstat_64(struct sysctl_req * req)2393 sysctl_rtstat_64(struct sysctl_req *req)
2394 {
2395 	return SYSCTL_OUT(req, &rtstat, sizeof(struct rtstat_64));
2396 }
2397 
2398 static int
sysctl_rttrash(struct sysctl_req * req)2399 sysctl_rttrash(struct sysctl_req *req)
2400 {
2401 	return SYSCTL_OUT(req, &rttrash, sizeof(rttrash));
2402 }
2403 
2404 static int
2405 sysctl_rtsock SYSCTL_HANDLER_ARGS
2406 {
2407 #pragma unused(oidp)
2408 	DECLARE_SYSCTL_HANDLER_ARG_ARRAY(int, 4, name, namelen);
2409 	struct radix_node_head *rnh;
2410 	int     i, error = EINVAL;
2411 	u_char  af;
2412 	struct  walkarg w;
2413 
2414 	name++;
2415 	namelen--;
2416 	if (req->newptr) {
2417 		return EPERM;
2418 	}
2419 	af = (u_char)name[0];
2420 	Bzero(&w, sizeof(w));
2421 	w.w_op = name[1];
2422 	w.w_arg = name[2];
2423 	w.w_req = req;
2424 
2425 	switch (w.w_op) {
2426 	case NET_RT_DUMP:
2427 	case NET_RT_DUMP2:
2428 	case NET_RT_FLAGS:
2429 	case NET_RT_FLAGS_PRIV:
2430 		lck_mtx_lock(rnh_lock);
2431 		for (i = 1; i <= AF_MAX; i++) {
2432 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2433 			    (error = rnh->rnh_walktree(rnh,
2434 			    sysctl_dumpentry, &w))) {
2435 				break;
2436 			}
2437 		}
2438 		lck_mtx_unlock(rnh_lock);
2439 		break;
2440 	case NET_RT_DUMPX:
2441 	case NET_RT_DUMPX_FLAGS:
2442 		lck_mtx_lock(rnh_lock);
2443 		for (i = 1; i <= AF_MAX; i++) {
2444 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2445 			    (error = rnh->rnh_walktree(rnh,
2446 			    sysctl_dumpentry_ext, &w))) {
2447 				break;
2448 			}
2449 		}
2450 		lck_mtx_unlock(rnh_lock);
2451 		break;
2452 	case NET_RT_IFLIST:
2453 		error = sysctl_iflist(af, &w);
2454 		break;
2455 	case NET_RT_IFLIST2:
2456 		error = sysctl_iflist2(af, &w);
2457 		break;
2458 	case NET_RT_STAT:
2459 		error = sysctl_rtstat(req);
2460 		break;
2461 	case NET_RT_STAT_64:
2462 		error = sysctl_rtstat_64(req);
2463 		break;
2464 	case NET_RT_TRASH:
2465 		error = sysctl_rttrash(req);
2466 		break;
2467 	}
2468 	if (w.w_tmem != NULL) {
2469 		kfree_data_sized_by(w.w_tmem, w.w_tmemsize);
2470 	}
2471 	return error;
2472 }
2473 
2474 /*
2475  * Definitions of protocols supported in the ROUTE domain.
2476  */
2477 static struct protosw routesw[] = {
2478 	{
2479 		.pr_type =              SOCK_RAW,
2480 		.pr_protocol =          0,
2481 		.pr_flags =             PR_ATOMIC | PR_ADDR,
2482 		.pr_output =            route_output,
2483 		.pr_ctlinput =          raw_ctlinput,
2484 		.pr_usrreqs =           &route_usrreqs,
2485 	}
2486 };
2487 
2488 static int route_proto_count = (sizeof(routesw) / sizeof(struct protosw));
2489 
2490 struct domain routedomain_s = {
2491 	.dom_family =           PF_ROUTE,
2492 	.dom_name =             "route",
2493 	.dom_init =             route_dinit,
2494 };
2495 
2496 static void
route_dinit(struct domain * dp)2497 route_dinit(struct domain *dp)
2498 {
2499 	struct protosw *pr;
2500 	int i;
2501 
2502 	VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
2503 	VERIFY(routedomain == NULL);
2504 
2505 	routedomain = dp;
2506 
2507 	for (i = 0, pr = &routesw[0]; i < route_proto_count; i++, pr++) {
2508 		net_add_proto(pr, dp, 1);
2509 	}
2510 
2511 	route_init();
2512 }
2513