xref: /xnu-11417.140.69/bsd/net/rtsock.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)rtsock.c	8.5 (Berkeley) 11/2/94
61  */
62 
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kauth.h>
66 #include <sys/kernel.h>
67 #include <sys/proc.h>
68 #include <sys/malloc.h>
69 #include <sys/mbuf.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/domain.h>
73 #include <sys/protosw.h>
74 #include <sys/syslog.h>
75 #include <sys/mcache.h>
76 #include <kern/locks.h>
77 #include <sys/codesign.h>
78 
79 #include <net/if.h>
80 #include <net/route.h>
81 #include <net/dlil.h>
82 #include <net/raw_cb.h>
83 #include <net/net_sysctl.h>
84 
85 #include <netinet/in.h>
86 #include <netinet/in_var.h>
87 #include <netinet/in_arp.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip6.h>
90 #include <netinet6/nd6.h>
91 
92 #include <net/sockaddr_utils.h>
93 
94 #include <IOKit/IOBSD.h>
95 
96 extern struct rtstat_64 rtstat;
97 extern struct domain routedomain_s;
98 static struct domain *routedomain = NULL;
99 
100 static struct sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
101 static struct sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
102 static struct sockaddr sa_zero   = { .sa_len = sizeof(sa_zero), .sa_family = AF_INET, .sa_data = { 0, } };
103 
104 struct route_cb {
105 	u_int32_t       ip_count;       /* attached w/ AF_INET */
106 	u_int32_t       ip6_count;      /* attached w/ AF_INET6 */
107 	u_int32_t       any_count;      /* total attached */
108 };
109 
110 static struct route_cb route_cb;
111 
112 struct walkarg {
113 	int     w_tmemsize;
114 	int     w_op, w_arg;
115 	caddr_t w_tmem __sized_by(w_tmemsize);
116 	struct sysctl_req *w_req;
117 };
118 
119 typedef struct walkarg * __single walkarg_ref_t;
120 
121 static void route_dinit(struct domain *);
122 static int rts_abort(struct socket *);
123 static int rts_attach(struct socket *, int, struct proc *);
124 static int rts_bind(struct socket *, struct sockaddr *, struct proc *);
125 static int rts_connect(struct socket *, struct sockaddr *, struct proc *);
126 static int rts_detach(struct socket *);
127 static int rts_disconnect(struct socket *);
128 static int rts_peeraddr(struct socket *, struct sockaddr **);
129 static int rts_send(struct socket *, int, struct mbuf *, struct sockaddr *,
130     struct mbuf *, struct proc *);
131 static int rts_shutdown(struct socket *);
132 static int rts_sockaddr(struct socket *, struct sockaddr **);
133 
134 static int route_output(struct mbuf *, struct socket *);
135 static int rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
136 static void rt_getmetrics(struct rtentry *, struct rt_metrics *);
137 static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
138     struct sockaddr *, unsigned int);
139 static int rt_xaddrs(caddr_t cp __ended_by(cplim), caddr_t cplim, struct rt_addrinfo *rtinfo, struct sockaddr xtra_storage[RTAX_MAX]);
140 static struct mbuf *rt_msg1(u_char, struct rt_addrinfo *);
141 static int rt_msg2(u_char, struct rt_addrinfo *, caddr_t __indexable, struct walkarg *,
142     kauth_cred_t *);
143 static int sysctl_dumpentry(struct radix_node *rn, void *vw);
144 static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw);
145 static int sysctl_iflist(int af, struct walkarg *w);
146 static int sysctl_iflist2(int af, struct walkarg *w);
147 static int sysctl_rtstat(struct sysctl_req *);
148 static int sysctl_rtstat_64(struct sysctl_req *);
149 static int sysctl_rttrash(struct sysctl_req *);
150 static int sysctl_rtsock SYSCTL_HANDLER_ARGS;
151 
152 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED,
153     sysctl_rtsock, "");
154 
155 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "routing");
156 
157 /* Align x to 1024 (only power of 2) assuming x is positive */
158 #define ALIGN_BYTES(x) do {                                             \
159 	x = (uint32_t)P2ALIGN(x, 1024);                         \
160 } while(0)
161 
162 #define ROUNDUP32(a)                                                    \
163 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (uint32_t) - 1))) :        \
164 	sizeof (uint32_t))
165 
166 
167 #define RT_HAS_IFADDR(rt)                                               \
168 	((rt)->rt_ifa != NULL && (rt)->rt_ifa->ifa_addr != NULL)
169 
170 /*
171  * It really doesn't make any sense at all for this code to share much
172  * with raw_usrreq.c, since its functionality is so restricted.  XXX
173  */
174 static int
rts_abort(struct socket * so)175 rts_abort(struct socket *so)
176 {
177 	return raw_usrreqs.pru_abort(so);
178 }
179 
180 /* pru_accept is EOPNOTSUPP */
181 
182 static int
rts_attach(struct socket * so,int proto,struct proc * p)183 rts_attach(struct socket *so, int proto, struct proc *p)
184 {
185 #pragma unused(p)
186 	struct rawcb *rp;
187 	int error;
188 
189 	VERIFY(so->so_pcb == NULL);
190 
191 	rp = kalloc_type(struct rawcb, Z_WAITOK_ZERO_NOFAIL);
192 	so->so_pcb = (caddr_t)rp;
193 	/* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */
194 	error = raw_attach(so, proto);
195 	rp = sotorawcb(so);
196 	if (error) {
197 		kfree_type(struct rawcb, rp);
198 		so->so_pcb = NULL;
199 		so->so_flags |= SOF_PCBCLEARING;
200 		return error;
201 	}
202 
203 	switch (rp->rcb_proto.sp_protocol) {
204 	case AF_INET:
205 		os_atomic_inc(&route_cb.ip_count, relaxed);
206 		break;
207 	case AF_INET6:
208 		os_atomic_inc(&route_cb.ip6_count, relaxed);
209 		break;
210 	}
211 	rp->rcb_faddr = &route_src;
212 	os_atomic_inc(&route_cb.any_count, relaxed);
213 	/* the socket is already locked when we enter rts_attach */
214 	soisconnected(so);
215 	so->so_options |= SO_USELOOPBACK;
216 	return 0;
217 }
218 
219 static int
rts_bind(struct socket * so,struct sockaddr * nam,struct proc * p)220 rts_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
221 {
222 	return raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */
223 }
224 
225 static int
rts_connect(struct socket * so,struct sockaddr * nam,struct proc * p)226 rts_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
227 {
228 	return raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */
229 }
230 
231 /* pru_connect2 is EOPNOTSUPP */
232 /* pru_control is EOPNOTSUPP */
233 
234 static int
rts_detach(struct socket * so)235 rts_detach(struct socket *so)
236 {
237 	struct rawcb *rp = sotorawcb(so);
238 
239 	VERIFY(rp != NULL);
240 
241 	switch (rp->rcb_proto.sp_protocol) {
242 	case AF_INET:
243 		os_atomic_dec(&route_cb.ip_count, relaxed);
244 		break;
245 	case AF_INET6:
246 		os_atomic_dec(&route_cb.ip6_count, relaxed);
247 		break;
248 	}
249 	os_atomic_dec(&route_cb.any_count, relaxed);
250 	return raw_usrreqs.pru_detach(so);
251 }
252 
253 static int
rts_disconnect(struct socket * so)254 rts_disconnect(struct socket *so)
255 {
256 	return raw_usrreqs.pru_disconnect(so);
257 }
258 
259 /* pru_listen is EOPNOTSUPP */
260 
261 static int
rts_peeraddr(struct socket * so,struct sockaddr ** nam)262 rts_peeraddr(struct socket *so, struct sockaddr **nam)
263 {
264 	return raw_usrreqs.pru_peeraddr(so, nam);
265 }
266 
267 /* pru_rcvd is EOPNOTSUPP */
268 /* pru_rcvoob is EOPNOTSUPP */
269 
270 static int
rts_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)271 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
272     struct mbuf *control, struct proc *p)
273 {
274 	return raw_usrreqs.pru_send(so, flags, m, nam, control, p);
275 }
276 
277 /* pru_sense is null */
278 
279 static int
rts_shutdown(struct socket * so)280 rts_shutdown(struct socket *so)
281 {
282 	return raw_usrreqs.pru_shutdown(so);
283 }
284 
285 static int
rts_sockaddr(struct socket * so,struct sockaddr ** nam)286 rts_sockaddr(struct socket *so, struct sockaddr **nam)
287 {
288 	return raw_usrreqs.pru_sockaddr(so, nam);
289 }
290 
291 static struct pr_usrreqs route_usrreqs = {
292 	.pru_abort =            rts_abort,
293 	.pru_attach =           rts_attach,
294 	.pru_bind =             rts_bind,
295 	.pru_connect =          rts_connect,
296 	.pru_detach =           rts_detach,
297 	.pru_disconnect =       rts_disconnect,
298 	.pru_peeraddr =         rts_peeraddr,
299 	.pru_send =             rts_send,
300 	.pru_shutdown =         rts_shutdown,
301 	.pru_sockaddr =         rts_sockaddr,
302 	.pru_sosend =           sosend,
303 	.pru_soreceive =        soreceive,
304 };
305 
306 static struct rt_msghdr *
307 __attribute__((always_inline))
308 __stateful_pure
_rtm_hdr(caddr_t rtm_data __header_indexable)309 _rtm_hdr(caddr_t rtm_data __header_indexable)
310 {
311 #pragma clang diagnostic push
312 #pragma clang diagnostic ignored "-Wcast-align"
313 	return (struct rt_msghdr*)rtm_data;
314 #pragma clang diagnostic pop
315 }
316 
317 /*ARGSUSED*/
318 static int
route_output(struct mbuf * m,struct socket * so)319 route_output(struct mbuf *m, struct socket *so)
320 {
321 	size_t rtm_len = 0;
322 	caddr_t rtm_buf __counted_by(rtm_len) = NULL;
323 	caddr_t rtm_tmpbuf;
324 #define RTM _rtm_hdr(rtm_buf)
325 	rtentry_ref_t rt = NULL;
326 	rtentry_ref_t saved_nrt = NULL;
327 	struct radix_node_head *rnh;
328 	struct rt_addrinfo info;
329 	struct sockaddr    tiny_sa_storage[RTAX_MAX];
330 	int len, error = 0;
331 	sa_family_t dst_sa_family = 0;
332 	struct ifnet *ifp = NULL;
333 	struct sockaddr_in dst_in, gate_in;
334 	int sendonlytoself = 0;
335 	unsigned int ifscope = IFSCOPE_NONE;
336 	struct rawcb *rp = NULL;
337 	boolean_t is_router = FALSE;
338 #define senderr(e) { error = (e); goto flush; }
339 	if (m == NULL || ((m->m_len < sizeof(intptr_t)) &&
340 	    (m = m_pullup(m, sizeof(intptr_t))) == NULL)) {
341 		return ENOBUFS;
342 	}
343 	VERIFY(m->m_flags & M_PKTHDR);
344 
345 	/*
346 	 * Unlock the socket (but keep a reference) it won't be
347 	 * accessed until raw_input appends to it.
348 	 */
349 	socket_unlock(so, 0);
350 	lck_mtx_lock(rnh_lock);
351 
352 	len = m->m_pkthdr.len;
353 	if (len < sizeof(*RTM) ||
354 	    len != mtod(m, struct rt_msghdr_prelude *)->rtm_msglen) {
355 		info.rti_info[RTAX_DST] = NULL;
356 		senderr(EINVAL);
357 	}
358 
359 	/*
360 	 * Allocate the buffer for the message. First we allocate
361 	 * a temporary buffer, and if successful, set the pointers.
362 	 */
363 	rtm_tmpbuf = kalloc_data(len, Z_WAITOK);
364 	if (rtm_tmpbuf == NULL) {
365 		info.rti_info[RTAX_DST] = NULL;
366 		senderr(ENOBUFS);
367 	}
368 	rtm_len = (size_t)len;
369 	rtm_buf = rtm_tmpbuf;
370 	rtm_tmpbuf = NULL;
371 
372 
373 	m_copydata(m, 0, len, rtm_buf);
374 
375 	if (RTM->rtm_version != RTM_VERSION) {
376 		info.rti_info[RTAX_DST] = NULL;
377 		senderr(EPROTONOSUPPORT);
378 	}
379 
380 	/*
381 	 * Silent version of RTM_GET for Reachabiltiy APIs. We may change
382 	 * all RTM_GETs to be silent in the future, so this is private for now.
383 	 */
384 	if (RTM->rtm_type == RTM_GET_SILENT) {
385 		if (!(so->so_options & SO_USELOOPBACK)) {
386 			senderr(EINVAL);
387 		}
388 		sendonlytoself = 1;
389 		RTM->rtm_type = RTM_GET;
390 	}
391 
392 	/*
393 	 * Perform permission checking, only privileged sockets
394 	 * may perform operations other than RTM_GET
395 	 */
396 	if (RTM->rtm_type != RTM_GET && !(so->so_state & SS_PRIV)) {
397 		info.rti_info[RTAX_DST] = NULL;
398 		senderr(EPERM);
399 	}
400 
401 	RTM->rtm_pid = proc_selfpid();
402 	info.rti_addrs = RTM->rtm_addrs;
403 
404 	if (rt_xaddrs(rtm_buf + sizeof(struct rt_msghdr), rtm_buf + rtm_len, &info, tiny_sa_storage)) {
405 		info.rti_info[RTAX_DST] = NULL;
406 		senderr(EINVAL);
407 	}
408 
409 	if (info.rti_info[RTAX_DST] == NULL ||
410 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
411 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
412 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) {
413 		senderr(EINVAL);
414 	}
415 
416 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET &&
417 	    info.rti_info[RTAX_DST]->sa_len != sizeof(struct sockaddr_in)) {
418 		/* At minimum, we need up to sin_addr */
419 		if (info.rti_info[RTAX_DST]->sa_len <
420 		    offsetof(struct sockaddr_in, sin_zero)) {
421 			senderr(EINVAL);
422 		}
423 
424 		SOCKADDR_ZERO(&dst_in, sizeof(dst_in));
425 		dst_in.sin_len = sizeof(dst_in);
426 		dst_in.sin_family = AF_INET;
427 		dst_in.sin_port = SIN(info.rti_info[RTAX_DST])->sin_port;
428 		dst_in.sin_addr = SIN(info.rti_info[RTAX_DST])->sin_addr;
429 		info.rti_info[RTAX_DST] = SA(&dst_in);
430 		dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
431 	} else if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
432 	    info.rti_info[RTAX_DST]->sa_len < sizeof(struct sockaddr_in6)) {
433 		senderr(EINVAL);
434 	}
435 
436 	if (info.rti_info[RTAX_GATEWAY] != NULL) {
437 		if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET &&
438 		    info.rti_info[RTAX_GATEWAY]->sa_len != sizeof(struct sockaddr_in)) {
439 			/* At minimum, we need up to sin_addr */
440 			if (info.rti_info[RTAX_GATEWAY]->sa_len <
441 			    offsetof(struct sockaddr_in, sin_zero)) {
442 				senderr(EINVAL);
443 			}
444 
445 			SOCKADDR_ZERO(&gate_in, sizeof(gate_in));
446 			gate_in.sin_len = sizeof(gate_in);
447 			gate_in.sin_family = AF_INET;
448 			gate_in.sin_port = SIN(info.rti_info[RTAX_GATEWAY])->sin_port;
449 			gate_in.sin_addr = SIN(info.rti_info[RTAX_GATEWAY])->sin_addr;
450 			info.rti_info[RTAX_GATEWAY] = SA(&gate_in);
451 		} else if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET6 &&
452 		    info.rti_info[RTAX_GATEWAY]->sa_len < sizeof(struct sockaddr_in6)) {
453 			senderr(EINVAL);
454 		}
455 	}
456 
457 	if (info.rti_info[RTAX_GENMASK]) {
458 		struct radix_node *t;
459 		struct sockaddr *genmask = SA(info.rti_info[RTAX_GENMASK]);
460 		void *genmask_bytes = __SA_UTILS_CONV_TO_BYTES(genmask);
461 		t = rn_addmask(genmask_bytes, 0, 1);
462 		if (t != NULL && SOCKADDR_CMP(genmask, rn_get_key(t), genmask->sa_len) == 0) {
463 			info.rti_info[RTAX_GENMASK] = SA(rn_get_key(t));
464 		} else {
465 			senderr(ENOBUFS);
466 		}
467 	}
468 
469 	/*
470 	 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
471 	 */
472 	if (RTM->rtm_flags & RTF_IFSCOPE) {
473 		if (info.rti_info[RTAX_DST]->sa_family != AF_INET &&
474 		    info.rti_info[RTAX_DST]->sa_family != AF_INET6) {
475 			senderr(EINVAL);
476 		}
477 		ifscope = RTM->rtm_index;
478 	}
479 	/*
480 	 * Block changes on INTCOPROC interfaces.
481 	 */
482 	if (ifscope != IFSCOPE_NONE) {
483 		unsigned int intcoproc_scope = 0;
484 		ifnet_head_lock_shared();
485 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
486 			if (IFNET_IS_INTCOPROC(ifp)) {
487 				intcoproc_scope = ifp->if_index;
488 				break;
489 			}
490 		}
491 		ifnet_head_done();
492 		if (intcoproc_scope == ifscope && proc_getpid(current_proc()) != 0) {
493 			senderr(EINVAL);
494 		}
495 	}
496 	/*
497 	 * Require entitlement to change management interfaces
498 	 */
499 	if (management_control_unrestricted == false && if_management_interface_check_needed == true &&
500 	    ifscope != IFSCOPE_NONE && proc_getpid(current_proc()) != 0) {
501 		bool is_management = false;
502 
503 		ifnet_head_lock_shared();
504 		if (IF_INDEX_IN_RANGE(ifscope)) {
505 			ifp = ifindex2ifnet[ifscope];
506 			if (ifp != NULL && IFNET_IS_MANAGEMENT(ifp)) {
507 				is_management = true;
508 			}
509 		}
510 		ifnet_head_done();
511 
512 		if (is_management && !IOCurrentTaskHasEntitlement(MANAGEMENT_CONTROL_ENTITLEMENT)) {
513 			senderr(EINVAL);
514 		}
515 	}
516 
517 	/*
518 	 * RTF_PROXY can only be set internally from within the kernel.
519 	 */
520 	if (RTM->rtm_flags & RTF_PROXY) {
521 		senderr(EINVAL);
522 	}
523 
524 	/*
525 	 * For AF_INET, always zero out the embedded scope ID.  If this is
526 	 * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
527 	 * flag and the corresponding rtm_index value.  This is to prevent
528 	 * false interpretation of the scope ID because it's using the sin_zero
529 	 * field, which might not be properly cleared by the requestor.
530 	 */
531 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET) {
532 		sin_set_ifscope(info.rti_info[RTAX_DST], IFSCOPE_NONE);
533 	}
534 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
535 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET) {
536 		sin_set_ifscope(info.rti_info[RTAX_GATEWAY], IFSCOPE_NONE);
537 	}
538 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
539 	    IN6_IS_SCOPE_EMBED(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
540 	    !IN6_IS_ADDR_UNICAST_BASED_MULTICAST(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
541 	    SIN6(info.rti_info[RTAX_DST])->sin6_scope_id == 0) {
542 		SIN6(info.rti_info[RTAX_DST])->sin6_scope_id = ntohs(SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1]);
543 		SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1] = 0;
544 	}
545 
546 	switch (RTM->rtm_type) {
547 	case RTM_ADD:
548 		if (info.rti_info[RTAX_GATEWAY] == NULL) {
549 			senderr(EINVAL);
550 		}
551 
552 		error = rtrequest_scoped_locked(RTM_ADD,
553 		    info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
554 		    info.rti_info[RTAX_NETMASK], RTM->rtm_flags, &saved_nrt,
555 		    ifscope);
556 		if (error == 0 && saved_nrt != NULL) {
557 			RT_LOCK(saved_nrt);
558 			/*
559 			 * If the route request specified an interface with
560 			 * IFA and/or IFP, we set the requested interface on
561 			 * the route with rt_setif.  It would be much better
562 			 * to do this inside rtrequest, but that would
563 			 * require passing the desired interface, in some
564 			 * form, to rtrequest.  Since rtrequest is called in
565 			 * so many places (roughly 40 in our source), adding
566 			 * a parameter is to much for us to swallow; this is
567 			 * something for the FreeBSD developers to tackle.
568 			 * Instead, we let rtrequest compute whatever
569 			 * interface it wants, then come in behind it and
570 			 * stick in the interface that we really want.  This
571 			 * works reasonably well except when rtrequest can't
572 			 * figure out what interface to use (with
573 			 * ifa_withroute) and returns ENETUNREACH.  Ideally
574 			 * it shouldn't matter if rtrequest can't figure out
575 			 * the interface if we're going to explicitly set it
576 			 * ourselves anyway.  But practically we can't
577 			 * recover here because rtrequest will not do any of
578 			 * the work necessary to add the route if it can't
579 			 * find an interface.  As long as there is a default
580 			 * route that leads to some interface, rtrequest will
581 			 * find an interface, so this problem should be
582 			 * rarely encountered.
583 			 * [email protected]
584 			 */
585 			rt_setif(saved_nrt,
586 			    info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
587 			    info.rti_info[RTAX_GATEWAY], ifscope);
588 			(void)rt_setmetrics(RTM->rtm_inits, &RTM->rtm_rmx, saved_nrt);
589 			saved_nrt->rt_rmx.rmx_locks &= ~(RTM->rtm_inits);
590 			saved_nrt->rt_rmx.rmx_locks |=
591 			    (RTM->rtm_inits & RTM->rtm_rmx.rmx_locks);
592 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
593 			RT_REMREF_LOCKED(saved_nrt);
594 			RT_UNLOCK(saved_nrt);
595 		}
596 		break;
597 
598 	case RTM_DELETE:
599 		error = rtrequest_scoped_locked(RTM_DELETE,
600 		    info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
601 		    info.rti_info[RTAX_NETMASK], RTM->rtm_flags, &saved_nrt,
602 		    ifscope);
603 		if (error == 0) {
604 			rt = saved_nrt;
605 			RT_LOCK(rt);
606 			goto report;
607 		}
608 		break;
609 
610 	case RTM_GET:
611 	case RTM_CHANGE:
612 	case RTM_LOCK:
613 		rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
614 		if (rnh == NULL) {
615 			senderr(EAFNOSUPPORT);
616 		}
617 		/*
618 		 * Lookup the best match based on the key-mask pair;
619 		 * callee adds a reference and checks for root node.
620 		 */
621 		rt = rt_lookup(TRUE, info.rti_info[RTAX_DST],
622 		    info.rti_info[RTAX_NETMASK], rnh, ifscope);
623 		if (rt == NULL) {
624 			senderr(ESRCH);
625 		}
626 		RT_LOCK(rt);
627 
628 		/*
629 		 * Holding rnh_lock here prevents the possibility of
630 		 * ifa from changing (e.g. in_ifinit), so it is safe
631 		 * to access its ifa_addr (down below) without locking.
632 		 */
633 		switch (RTM->rtm_type) {
634 		case RTM_GET: {
635 			kauth_cred_t cred __single;
636 			kauth_cred_t* credp;
637 			struct ifaddr *ifa2;
638 			/*
639 			 * The code below serves both the `RTM_GET'
640 			 * and the `RTM_DELETE' requests.
641 			 */
642 report:
643 			cred = current_cached_proc_cred(PROC_NULL);
644 			credp = &cred;
645 
646 			ifa2 = NULL;
647 			RT_LOCK_ASSERT_HELD(rt);
648 			info.rti_info[RTAX_DST] = rt_key(rt);
649 			dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
650 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
651 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
652 			info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
653 			if (RTM->rtm_addrs & (RTA_IFP | RTA_IFA)) {
654 				ifp = rt->rt_ifp;
655 				if (ifp != NULL) {
656 					ifnet_lock_shared(ifp);
657 					ifa2 = ifp->if_lladdr;
658 					info.rti_info[RTAX_IFP] = ifa2->ifa_addr;
659 					ifa_addref(ifa2);
660 					ifnet_lock_done(ifp);
661 					info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
662 					RTM->rtm_index = ifp->if_index;
663 				} else {
664 					info.rti_info[RTAX_IFP] = NULL;
665 					info.rti_info[RTAX_IFA] = NULL;
666 				}
667 			} else if ((ifp = rt->rt_ifp) != NULL) {
668 				RTM->rtm_index = ifp->if_index;
669 			}
670 
671 			/*
672 			 * Determine the length required for the routing information
673 			 * report.
674 			 */
675 			if (ifa2 != NULL) {
676 				IFA_LOCK(ifa2);
677 			}
678 			len = rt_msg2(RTM->rtm_type, &info, NULL, NULL, credp);
679 			if (ifa2 != NULL) {
680 				IFA_UNLOCK(ifa2);
681 			}
682 
683 			/*
684 			 * Allocate output message for the routing information report.
685 			 */
686 			VERIFY(rtm_tmpbuf == NULL);
687 			rtm_tmpbuf = kalloc_data(len, Z_WAITOK);
688 			if (rtm_tmpbuf == NULL) {
689 				RT_UNLOCK(rt);
690 				if (ifa2 != NULL) {
691 					ifa_remref(ifa2);
692 				}
693 				senderr(ENOBUFS);
694 			}
695 
696 			/*
697 			 * Create the header for the output message, based
698 			 * on the request message header and the current routing information.
699 			 */
700 			struct rt_msghdr *out_rtm = _rtm_hdr(rtm_tmpbuf);
701 			bcopy(RTM, out_rtm, sizeof(struct rt_msghdr));
702 			out_rtm->rtm_flags = rt->rt_flags;
703 			rt_getmetrics(rt, &out_rtm->rtm_rmx);
704 			out_rtm->rtm_addrs = info.rti_addrs;
705 
706 			/*
707 			 * Populate the body of the output message.
708 			 */
709 			if (ifa2 != NULL) {
710 				IFA_LOCK(ifa2);
711 			}
712 			(void) rt_msg2(out_rtm->rtm_type, &info, rtm_tmpbuf,
713 			    NULL, &cred);
714 			if (ifa2 != NULL) {
715 				IFA_UNLOCK(ifa2);
716 			}
717 
718 			/*
719 			 * Replace the "main" routing message with the output message
720 			 * we have constructed.
721 			 */
722 			kfree_data_counted_by(rtm_buf, rtm_len);
723 			rtm_len = len;
724 			rtm_buf = rtm_tmpbuf;
725 			rtm_tmpbuf = NULL;
726 
727 			if (ifa2 != NULL) {
728 				ifa_remref(ifa2);
729 			}
730 
731 			break;
732 		}
733 
734 		case RTM_CHANGE:
735 			is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
736 
737 			if (info.rti_info[RTAX_GATEWAY] != NULL &&
738 			    (error = rt_setgate(rt, rt_key(rt),
739 			    info.rti_info[RTAX_GATEWAY]))) {
740 				int tmp = error;
741 				RT_UNLOCK(rt);
742 				senderr(tmp);
743 			}
744 			/*
745 			 * If they tried to change things but didn't specify
746 			 * the required gateway, then just use the old one.
747 			 * This can happen if the user tries to change the
748 			 * flags on the default route without changing the
749 			 * default gateway. Changing flags still doesn't work.
750 			 */
751 			if ((rt->rt_flags & RTF_GATEWAY) &&
752 			    info.rti_info[RTAX_GATEWAY] == NULL) {
753 				info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
754 			}
755 
756 			/*
757 			 * On Darwin, we call rt_setif which contains the
758 			 * equivalent to the code found at this very spot
759 			 * in BSD.
760 			 */
761 			rt_setif(rt,
762 			    info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
763 			    info.rti_info[RTAX_GATEWAY], ifscope);
764 
765 			if ((error = rt_setmetrics(RTM->rtm_inits,
766 			    &RTM->rtm_rmx, rt))) {
767 				int tmp = error;
768 				RT_UNLOCK(rt);
769 				senderr(tmp);
770 			}
771 			if (info.rti_info[RTAX_GENMASK]) {
772 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
773 			}
774 
775 			/*
776 			 * Enqueue work item to invoke callback for this route entry
777 			 * This may not be needed always, but for now issue it anytime
778 			 * RTM_CHANGE gets called.
779 			 */
780 			route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, TRUE);
781 			/*
782 			 * If the route is for a router, walk the tree to send refresh
783 			 * event to protocol cloned entries
784 			 */
785 			if (is_router) {
786 				struct route_event rt_ev;
787 				route_event_init(&rt_ev, rt, NULL, ROUTE_ENTRY_REFRESH);
788 				RT_UNLOCK(rt);
789 				(void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
790 				RT_LOCK(rt);
791 			}
792 			OS_FALLTHROUGH;
793 		case RTM_LOCK:
794 			rt->rt_rmx.rmx_locks &= ~(RTM->rtm_inits);
795 			rt->rt_rmx.rmx_locks |=
796 			    (RTM->rtm_inits & RTM->rtm_rmx.rmx_locks);
797 			break;
798 		}
799 		RT_UNLOCK(rt);
800 		break;
801 	default:
802 		senderr(EOPNOTSUPP);
803 	}
804 flush:
805 	if (RTM != NULL) {
806 		if (error) {
807 			RTM->rtm_errno = error;
808 		} else {
809 			RTM->rtm_flags |= RTF_DONE;
810 		}
811 	}
812 	if (rt != NULL) {
813 		RT_LOCK_ASSERT_NOTHELD(rt);
814 		rtfree_locked(rt);
815 	}
816 	lck_mtx_unlock(rnh_lock);
817 
818 	/* relock the socket now */
819 	socket_lock(so, 0);
820 	/*
821 	 * Check to see if we don't want our own messages.
822 	 */
823 	if (!(so->so_options & SO_USELOOPBACK)) {
824 		if (route_cb.any_count <= 1) {
825 			kfree_data_counted_by(rtm_buf, rtm_len);
826 			m_freem(m);
827 			return error;
828 		}
829 		/* There is another listener, so construct message */
830 		rp = sotorawcb(so);
831 	}
832 	if (rtm_buf != NULL) {
833 		m_copyback(m, 0, RTM->rtm_msglen, rtm_buf);
834 		if (m->m_pkthdr.len < RTM->rtm_msglen) {
835 			m_freem(m);
836 			m = NULL;
837 		} else if (m->m_pkthdr.len > RTM->rtm_msglen) {
838 			m_adj(m, RTM->rtm_msglen - m->m_pkthdr.len);
839 		}
840 		kfree_data_counted_by(rtm_buf, rtm_len);
841 	}
842 	if (sendonlytoself && m != NULL) {
843 		error = 0;
844 		if (sbappendaddr(&so->so_rcv, &route_src, m,
845 		    NULL, &error) != 0) {
846 			sorwakeup(so);
847 		}
848 		if (error) {
849 			return error;
850 		}
851 	} else {
852 		struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
853 		if (rp != NULL) {
854 			rp->rcb_proto.sp_family = 0; /* Avoid us */
855 		}
856 		if (dst_sa_family != 0) {
857 			route_proto.sp_protocol = dst_sa_family;
858 		}
859 		if (m != NULL) {
860 			socket_unlock(so, 0);
861 			raw_input(m, &route_proto, &route_src, &route_dst);
862 			socket_lock(so, 0);
863 		}
864 		if (rp != NULL) {
865 			rp->rcb_proto.sp_family = PF_ROUTE;
866 		}
867 	}
868 	return error;
869 #undef RTM /* was defined to __rtm_hdr(rtm_buf) */
870 }
871 
872 void
rt_setexpire(struct rtentry * rt,uint64_t expiry)873 rt_setexpire(struct rtentry *rt, uint64_t expiry)
874 {
875 	/* set both rt_expire and rmx_expire */
876 	rt->rt_expire = expiry;
877 	if (expiry) {
878 		rt->rt_rmx.rmx_expire =
879 		    (int32_t)(expiry + rt->base_calendartime -
880 		    rt->base_uptime);
881 	} else {
882 		rt->rt_rmx.rmx_expire = 0;
883 	}
884 }
885 
886 static int
rt_setmetrics(u_int32_t which,struct rt_metrics * in,struct rtentry * out)887 rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out)
888 {
889 	if (!(which & RTV_REFRESH_HOST)) {
890 		struct timeval caltime;
891 		getmicrotime(&caltime);
892 #define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
893 		metric(RTV_RPIPE, rmx_recvpipe);
894 		metric(RTV_SPIPE, rmx_sendpipe);
895 		metric(RTV_SSTHRESH, rmx_ssthresh);
896 		metric(RTV_RTT, rmx_rtt);
897 		metric(RTV_RTTVAR, rmx_rttvar);
898 		metric(RTV_HOPCOUNT, rmx_hopcount);
899 		metric(RTV_MTU, rmx_mtu);
900 		metric(RTV_EXPIRE, rmx_expire);
901 #undef metric
902 		if (out->rt_rmx.rmx_expire > 0) {
903 			/* account for system time change */
904 			getmicrotime(&caltime);
905 			out->base_calendartime +=
906 			    NET_CALCULATE_CLOCKSKEW(caltime,
907 			    out->base_calendartime,
908 			    net_uptime(), out->base_uptime);
909 			rt_setexpire(out,
910 			    out->rt_rmx.rmx_expire -
911 			    out->base_calendartime +
912 			    out->base_uptime);
913 		} else {
914 			rt_setexpire(out, 0);
915 		}
916 
917 		VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
918 		VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
919 	} else {
920 		/* Only RTV_REFRESH_HOST must be set */
921 		if ((which & ~RTV_REFRESH_HOST) ||
922 		    (out->rt_flags & RTF_STATIC) ||
923 		    !(out->rt_flags & RTF_LLINFO)) {
924 			return EINVAL;
925 		}
926 
927 		if (out->rt_llinfo_refresh == NULL) {
928 			return ENOTSUP;
929 		}
930 
931 		out->rt_llinfo_refresh(out);
932 	}
933 	return 0;
934 }
935 
936 static void
rt_getmetrics(struct rtentry * in,struct rt_metrics * out)937 rt_getmetrics(struct rtentry *in, struct rt_metrics *out)
938 {
939 	struct timeval caltime;
940 
941 	VERIFY(in->rt_expire == 0 || in->rt_rmx.rmx_expire != 0);
942 	VERIFY(in->rt_expire != 0 || in->rt_rmx.rmx_expire == 0);
943 
944 	*out = in->rt_rmx;
945 
946 	if (in->rt_expire != 0) {
947 		/* account for system time change */
948 		getmicrotime(&caltime);
949 
950 		in->base_calendartime +=
951 		    NET_CALCULATE_CLOCKSKEW(caltime,
952 		    in->base_calendartime, net_uptime(), in->base_uptime);
953 
954 		out->rmx_expire = (int32_t)(in->base_calendartime +
955 		    in->rt_expire - in->base_uptime);
956 	} else {
957 		out->rmx_expire = 0;
958 	}
959 }
960 
961 /*
962  * Set route's interface given info.rti_info[RTAX_IFP],
963  * info.rti_info[RTAX_IFA], and gateway.
964  */
965 static void
rt_setif(struct rtentry * rt,struct sockaddr * Ifpaddr,struct sockaddr * Ifaaddr,struct sockaddr * Gate,unsigned int ifscope)966 rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
967     struct sockaddr *Gate, unsigned int ifscope)
968 {
969 	struct ifaddr *ifa = NULL;
970 	struct ifnet *ifp = NULL;
971 	void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
972 
973 	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
974 
975 	RT_LOCK_ASSERT_HELD(rt);
976 
977 	/* Don't update a defunct route */
978 	if (rt->rt_flags & RTF_CONDEMNED) {
979 		return;
980 	}
981 
982 	/* Add an extra ref for ourselves */
983 	RT_ADDREF_LOCKED(rt);
984 
985 	/* Become a regular mutex, just in case */
986 	RT_CONVERT_LOCK(rt);
987 
988 	/*
989 	 * New gateway could require new ifaddr, ifp; flags may also
990 	 * be different; ifp may be specified by ll sockaddr when
991 	 * protocol address is ambiguous.
992 	 */
993 	if (Ifpaddr && (ifa = ifa_ifwithnet_scoped(Ifpaddr, ifscope)) &&
994 	    (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) {
995 		ifa_remref(ifa);
996 		ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp);
997 	} else {
998 		if (ifa != NULL) {
999 			ifa_remref(ifa);
1000 			ifa = NULL;
1001 		}
1002 		if (Ifpaddr && (ifp = if_withname(Ifpaddr))) {
1003 			if (Gate) {
1004 				ifa = ifaof_ifpforaddr(Gate, ifp);
1005 			} else {
1006 				ifnet_lock_shared(ifp);
1007 				ifa = TAILQ_FIRST(&ifp->if_addrhead);
1008 				if (ifa != NULL) {
1009 					ifa_addref(ifa);
1010 				}
1011 				ifnet_lock_done(ifp);
1012 			}
1013 		} else if (Ifaaddr &&
1014 		    (ifa = ifa_ifwithaddr_scoped(Ifaaddr, ifscope))) {
1015 			ifp = ifa->ifa_ifp;
1016 		} else if (Gate != NULL) {
1017 			/*
1018 			 * Safe to drop rt_lock and use rt_key, since holding
1019 			 * rnh_lock here prevents another thread from calling
1020 			 * rt_setgate() on this route.  We cannot hold the
1021 			 * lock across ifa_ifwithroute since the lookup done
1022 			 * by that routine may point to the same route.
1023 			 */
1024 			RT_UNLOCK(rt);
1025 			if ((ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags,
1026 			    rt_key(rt), Gate, ifscope)) != NULL) {
1027 				ifp = ifa->ifa_ifp;
1028 			}
1029 			RT_LOCK(rt);
1030 			/* Don't update a defunct route */
1031 			if (rt->rt_flags & RTF_CONDEMNED) {
1032 				if (ifa != NULL) {
1033 					ifa_remref(ifa);
1034 				}
1035 				/* Release extra ref */
1036 				RT_REMREF_LOCKED(rt);
1037 				return;
1038 			}
1039 		}
1040 	}
1041 
1042 	/* trigger route cache reevaluation */
1043 	if (rt_key(rt)->sa_family == AF_INET) {
1044 		routegenid_inet_update();
1045 	} else if (rt_key(rt)->sa_family == AF_INET6) {
1046 		routegenid_inet6_update();
1047 	}
1048 
1049 	if (ifa != NULL) {
1050 		struct ifaddr *oifa = rt->rt_ifa;
1051 		if (oifa != ifa) {
1052 			if (oifa != NULL) {
1053 				IFA_LOCK_SPIN(oifa);
1054 				ifa_rtrequest = oifa->ifa_rtrequest;
1055 				IFA_UNLOCK(oifa);
1056 				if (ifa_rtrequest != NULL) {
1057 					ifa_rtrequest(RTM_DELETE, rt, Gate);
1058 				}
1059 			}
1060 			rtsetifa(rt, ifa);
1061 
1062 			if (rt->rt_ifp != ifp) {
1063 				/*
1064 				 * Purge any link-layer info caching.
1065 				 */
1066 				if (rt->rt_llinfo_purge != NULL) {
1067 					rt->rt_llinfo_purge(rt);
1068 				}
1069 
1070 				/*
1071 				 * Adjust route ref count for the interfaces.
1072 				 */
1073 				if (rt->rt_if_ref_fn != NULL) {
1074 					rt->rt_if_ref_fn(ifp, 1);
1075 					rt->rt_if_ref_fn(rt->rt_ifp, -1);
1076 				}
1077 			}
1078 			rt->rt_ifp = ifp;
1079 			/*
1080 			 * If this is the (non-scoped) default route, record
1081 			 * the interface index used for the primary ifscope.
1082 			 */
1083 			if (rt_primary_default(rt, rt_key(rt))) {
1084 				set_primary_ifscope(rt_key(rt)->sa_family,
1085 				    rt->rt_ifp->if_index);
1086 			}
1087 			/*
1088 			 * If rmx_mtu is not locked, update it
1089 			 * to the MTU used by the new interface.
1090 			 */
1091 			if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
1092 				rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1093 				if (rt_key(rt)->sa_family == AF_INET &&
1094 				    INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
1095 					rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
1096 					/* Further adjust the size for CLAT46 expansion */
1097 					rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
1098 				}
1099 			}
1100 
1101 			if (rt->rt_ifa != NULL) {
1102 				IFA_LOCK_SPIN(rt->rt_ifa);
1103 				ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1104 				IFA_UNLOCK(rt->rt_ifa);
1105 				if (ifa_rtrequest != NULL) {
1106 					ifa_rtrequest(RTM_ADD, rt, Gate);
1107 				}
1108 			}
1109 			ifa_remref(ifa);
1110 			/* Release extra ref */
1111 			RT_REMREF_LOCKED(rt);
1112 			return;
1113 		}
1114 		ifa_remref(ifa);
1115 		ifa = NULL;
1116 	}
1117 
1118 	/* XXX: to reset gateway to correct value, at RTM_CHANGE */
1119 	if (rt->rt_ifa != NULL) {
1120 		IFA_LOCK_SPIN(rt->rt_ifa);
1121 		ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1122 		IFA_UNLOCK(rt->rt_ifa);
1123 		if (ifa_rtrequest != NULL) {
1124 			ifa_rtrequest(RTM_ADD, rt, Gate);
1125 		}
1126 	}
1127 
1128 	/*
1129 	 * Workaround for local address routes pointing to the loopback
1130 	 * interface added by configd, until <rdar://problem/12970142>.
1131 	 */
1132 	if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
1133 	    (rt->rt_flags & RTF_HOST) && rt->rt_ifa->ifa_ifp == rt->rt_ifp) {
1134 		ifa = ifa_ifwithaddr(rt_key(rt));
1135 		if (ifa != NULL) {
1136 			if (ifa != rt->rt_ifa) {
1137 				rtsetifa(rt, ifa);
1138 			}
1139 			ifa_remref(ifa);
1140 		}
1141 	}
1142 
1143 	/* Release extra ref */
1144 	RT_REMREF_LOCKED(rt);
1145 }
1146 
1147 /*
1148  * Extract the addresses of the passed sockaddrs.
1149  *
1150  * Do a little sanity checking so as to avoid bad memory references.
1151  * This data is derived straight from userland. Some of the data
1152  * anomalies are unrecoverable; for others we substitute the anomalous
1153  * user data with a sanitized replacement.
1154  *
1155  * Details on the input anomalies:
1156  *
1157  * 1. Unrecoverable input anomalies (retcode == EINVAL)
1158  *    The function returns EINVAL.
1159  *    1.1. Truncated sockaddrs at the end of the user-provided buffer.
1160  *    1.2. Unparseable sockaddr header (`0 < .sa_len && .sa_len < 2').
1161  *    1.3. Sockaddrs that won't fit `struct sockaddr_storage'.
1162  *
1163  * 2. Recoverable input anomalies (retcode == 0):
1164  *    The below anomalies would lead to a malformed `struct sockaddr *'
1165  *    pointers. Any attempt to pass such malformed pointers to a function
1166  *    or to assign those to another variable will cause a trap
1167  *    when the `-fbounds-safety' feature is enabled.
1168  *
1169  *    To mitigate the malformed pointers problem, we substitute the malformed
1170  *    user data with a well-formed sockaddrs.
1171  *
1172  *    2.1. Sockadrs with `.sa_len == 0' (aka "zero-length" sockaddrs).
1173  *         We substitute those with a pointer to the `sa_data' global
1174  *         variable.
1175  *    2.2. Sockaddrs with `.sa_len < 16' (a.k.a. "tiny" sockaddrs).
1176  *         We copy the contents of "tiny" sockaddrs to a location
1177  *         inside the `xtra_storage' parameter, and substitute
1178  *         the pointer into the user-provided data with the location
1179  *         in `xtra_storage'.
1180  */
1181 static int
rt_xaddrs(caddr_t cp __ended_by (cplim),caddr_t cplim,struct rt_addrinfo * rtinfo,struct sockaddr xtra_storage[RTAX_MAX])1182 rt_xaddrs(caddr_t cp __ended_by(cplim), caddr_t cplim, struct rt_addrinfo *rtinfo, struct sockaddr xtra_storage[RTAX_MAX])
1183 {
1184 	struct sockaddr *sa;
1185 	int i, next_tiny_sa = 0;
1186 
1187 	for (i = 0; i < RTAX_MAX; i++) {
1188 		SOCKADDR_ZERO(&xtra_storage[i], sizeof(struct sockaddr));
1189 	}
1190 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1191 
1192 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1193 		if ((rtinfo->rti_addrs & (1 << i)) == 0) {
1194 			continue;
1195 		}
1196 
1197 		/*
1198 		 * We expect the memory pointed to by `cp' to contain a valid socket address.
1199 		 * However, there are no guarantees that our expectations are correct,
1200 		 * since the buffer is passed from the user-space.
1201 		 * In particular, the socket address may be corrupted or truncated.
1202 		 * If we attempt to interpret the contents of the memory pointed to by `cp'
1203 		 * as a valid socket address, we may end up in a situation where the end
1204 		 * of the presumed socket address exceeds the end of the input buffer:
1205 		 *
1206 		 * +-------------------------------+
1207 		 * | user buffer                   |
1208 		 * +-------------------------------+
1209 		 *                       cp ^ cplim ^
1210 		 *                          +-----------------------+
1211 		 *                          | (struct sockaddr *)cp |
1212 		 *                          +-----------------------+
1213 		 *
1214 		 * In such case, we are likely to panic with the `-fbounds-safety' trap,
1215 		 * while the desired behavior is to return `ENOENT'.
1216 		 *
1217 		 * Because of the above concern, we can not optimistically cast the pointer
1218 		 * `cp' to `struct sockaddr*' until we have validated that the contents
1219 		 * of the memory can be safely interpreted as a socket address.
1220 		 *
1221 		 * Instead, we start by examining the expected length of the socket address,
1222 		 * which is guaranteed to be located at the first byte, and perform several
1223 		 * sanity checks, before interpreting the memory as a valid socket address.
1224 		 */
1225 		uint8_t next_sa_len = *cp;
1226 
1227 		/*
1228 		 * Is the user-provided sockaddr truncated?
1229 		 */
1230 		if ((cp + next_sa_len) > cplim) {
1231 			return EINVAL;
1232 		}
1233 
1234 		/*
1235 		 * Will the user-provided sockaddr fit the sockaddr storage?
1236 		 */
1237 		if (next_sa_len > sizeof(struct sockaddr_storage)) {
1238 			return EINVAL;
1239 		}
1240 
1241 		/*
1242 		 * there are no more.. quit now
1243 		 * If there are more bits, they are in error.
1244 		 * I've seen this. route(1) can evidently generate these.
1245 		 * This causes kernel to core dump.
1246 		 * for compatibility, If we see this, point to a safe address.
1247 		 */
1248 		if (next_sa_len == 0) {
1249 			rtinfo->rti_info[i] = &sa_zero;
1250 			return 0; /* should be EINVAL but for compat */
1251 		}
1252 
1253 		/*
1254 		 * Check for the minimal length.
1255 		 */
1256 		if (next_sa_len < offsetof(struct sockaddr, sa_data)) {
1257 			return EINVAL;
1258 		}
1259 
1260 		/*
1261 		 * Check whether we are looking at a "tiny" sockaddr,
1262 		 * and if so, copy the contents to the xtra storage.
1263 		 * See the comment to this function for the details
1264 		 * on "tiny" sockaddrs and the xtra storage.
1265 		 */
1266 		if (next_sa_len < sizeof(struct sockaddr)) {
1267 			sa = &xtra_storage[next_tiny_sa++];
1268 			SOCKADDR_COPY(cp, sa, next_sa_len);
1269 		} else {
1270 			sa = SA(cp);
1271 		}
1272 
1273 		/*
1274 		 * From this point on we can safely use `sa'.
1275 		 */
1276 
1277 		/* accepthe  it */
1278 		rtinfo->rti_info[i] = sa;
1279 		const uint32_t rounded_sa_len = ROUNDUP32(sa->sa_len);
1280 		if (cp + rounded_sa_len > cplim) {
1281 			break;
1282 		} else {
1283 			cp += rounded_sa_len;
1284 			cplim = cplim;
1285 		}
1286 	}
1287 	return 0;
1288 }
1289 
1290 static struct mbuf *
rt_msg1(u_char type,struct rt_addrinfo * rtinfo)1291 rt_msg1(u_char type, struct rt_addrinfo *rtinfo)
1292 {
1293 	struct rt_msghdr_common *rtmh;
1294 	int32_t *rtm_buf; /* int32 to preserve the alingment. */
1295 	struct mbuf *m;
1296 	int i;
1297 	int len, dlen, off;
1298 
1299 	switch (type) {
1300 	case RTM_DELADDR:
1301 	case RTM_NEWADDR:
1302 		len = sizeof(struct ifa_msghdr);
1303 		break;
1304 
1305 	case RTM_DELMADDR:
1306 	case RTM_NEWMADDR:
1307 		len = sizeof(struct ifma_msghdr);
1308 		break;
1309 
1310 	case RTM_IFINFO:
1311 		len = sizeof(struct if_msghdr);
1312 		break;
1313 
1314 	default:
1315 		len = sizeof(struct rt_msghdr);
1316 	}
1317 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1318 	if (m && len > MHLEN) {
1319 		MCLGET(m, M_DONTWAIT);
1320 		if (!(m->m_flags & M_EXT)) {
1321 			m_free(m);
1322 			m = NULL;
1323 		}
1324 	}
1325 	if (m == NULL) {
1326 		return NULL;
1327 	}
1328 	m->m_pkthdr.len = m->m_len = len;
1329 	m->m_pkthdr.rcvif = NULL;
1330 	rtm_buf = mtod(m, int32_t *);
1331 	bzero(rtm_buf, len);
1332 	off = len;
1333 	for (i = 0; i < RTAX_MAX; i++) {
1334 		struct sockaddr *sa, *hint;
1335 		uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1336 
1337 		/*
1338 		 * Make sure to accomodate the largest possible size of sa_len.
1339 		 */
1340 		_CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1341 
1342 		if ((sa = rtinfo->rti_info[i]) == NULL) {
1343 			continue;
1344 		}
1345 
1346 		switch (i) {
1347 		case RTAX_DST:
1348 		case RTAX_NETMASK:
1349 			if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1350 				hint = rtinfo->rti_info[RTAX_IFA];
1351 			}
1352 
1353 			/* Scrub away any trace of embedded interface scope */
1354 			sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1355 			    sizeof(ssbuf), NULL);
1356 			break;
1357 
1358 		default:
1359 			break;
1360 		}
1361 
1362 		rtinfo->rti_addrs |= (1 << i);
1363 		dlen = sa->sa_len;
1364 		m_copyback(m, off, dlen, __SA_UTILS_CONV_TO_BYTES(sa));
1365 		len = off + dlen;
1366 		off += ROUNDUP32(dlen);
1367 	}
1368 	if (m->m_pkthdr.len != len) {
1369 		m_freem(m);
1370 		return NULL;
1371 	}
1372 	rtmh = (struct rt_msghdr_common *)rtm_buf;
1373 	rtmh->rtm_msglen = (u_short)len;
1374 	rtmh->rtm_version = RTM_VERSION;
1375 	rtmh->rtm_type = type;
1376 	return m;
1377 }
1378 
1379 static int
rt_msg2(u_char type,struct rt_addrinfo * rtinfo,caddr_t cp __header_indexable,struct walkarg * w,kauth_cred_t * credp)1380 rt_msg2(u_char type, struct rt_addrinfo *rtinfo, caddr_t cp __header_indexable, struct walkarg *w,
1381     kauth_cred_t* credp)
1382 {
1383 	int i;
1384 	int len, dlen, rlen, second_time = 0;
1385 	caddr_t cp0;
1386 
1387 	rtinfo->rti_addrs = 0;
1388 again:
1389 	switch (type) {
1390 	case RTM_DELADDR:
1391 	case RTM_NEWADDR:
1392 		len = sizeof(struct ifa_msghdr);
1393 		break;
1394 
1395 	case RTM_DELMADDR:
1396 	case RTM_NEWMADDR:
1397 		len = sizeof(struct ifma_msghdr);
1398 		break;
1399 
1400 	case RTM_IFINFO:
1401 		len = sizeof(struct if_msghdr);
1402 		break;
1403 
1404 	case RTM_IFINFO2:
1405 		len = sizeof(struct if_msghdr2);
1406 		break;
1407 
1408 	case RTM_NEWMADDR2:
1409 		len = sizeof(struct ifma_msghdr2);
1410 		break;
1411 
1412 	case RTM_GET_EXT:
1413 		len = sizeof(struct rt_msghdr_ext);
1414 		break;
1415 
1416 	case RTM_GET2:
1417 		len = sizeof(struct rt_msghdr2);
1418 		break;
1419 
1420 	default:
1421 		len = sizeof(struct rt_msghdr);
1422 	}
1423 	cp0 = cp;
1424 	if (cp0) {
1425 		cp += len;
1426 	}
1427 	for (i = 0; i < RTAX_MAX; i++) {
1428 		struct sockaddr *sa, *hint;
1429 		uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1430 
1431 		/*
1432 		 * Make sure to accomodate the largest possible size of sa_len.
1433 		 */
1434 		_CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1435 
1436 		if ((sa = rtinfo->rti_info[i]) == NULL) {
1437 			continue;
1438 		}
1439 
1440 		switch (i) {
1441 		case RTAX_DST:
1442 		case RTAX_NETMASK:
1443 			if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1444 				hint = rtinfo->rti_info[RTAX_IFA];
1445 			}
1446 
1447 			/* Scrub away any trace of embedded interface scope */
1448 			sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1449 			    sizeof(ssbuf), NULL);
1450 			break;
1451 		case RTAX_GATEWAY:
1452 		case RTAX_IFP:
1453 			sa = rtm_scrub(type, i, NULL, sa, &ssbuf,
1454 			    sizeof(ssbuf), credp);
1455 			break;
1456 
1457 		default:
1458 			break;
1459 		}
1460 
1461 		rtinfo->rti_addrs |= (1 << i);
1462 		dlen = sa->sa_len;
1463 		rlen = ROUNDUP32(dlen);
1464 		if (cp) {
1465 			SOCKADDR_COPY(sa, cp, dlen);
1466 			if (dlen != rlen) {
1467 				bzero(cp + dlen, rlen - dlen);
1468 			}
1469 			cp += rlen;
1470 		}
1471 		len += rlen;
1472 	}
1473 	if (cp == NULL && w != NULL && !second_time) {
1474 		walkarg_ref_t rw = w;
1475 
1476 		if (rw->w_req != NULL) {
1477 			if (rw->w_tmemsize < len) {
1478 				if (rw->w_tmem != NULL) {
1479 					kfree_data_sized_by(rw->w_tmem, rw->w_tmemsize);
1480 				}
1481 				caddr_t new_tmem = (caddr_t)kalloc_data(len, Z_ZERO | Z_WAITOK);
1482 				if (new_tmem != NULL) {
1483 					rw->w_tmemsize = len;
1484 					rw->w_tmem = new_tmem;
1485 				}
1486 			}
1487 			if (rw->w_tmem != NULL) {
1488 				cp = rw->w_tmem;
1489 				second_time = 1;
1490 				goto again;
1491 			}
1492 		}
1493 	}
1494 	if (cp) {
1495 		struct rt_msghdr_common *rtmh = (struct rt_msghdr_common *)(void *)cp0;
1496 
1497 		rtmh->rtm_version = RTM_VERSION;
1498 		rtmh->rtm_type = type;
1499 		rtmh->rtm_msglen = (u_short)len;
1500 	}
1501 	return len;
1502 }
1503 
1504 /*
1505  * This routine is called to generate a message from the routing
1506  * socket indicating that a redirect has occurred, a routing lookup
1507  * has failed, or that a protocol has detected timeouts to a particular
1508  * destination.
1509  */
1510 void
rt_missmsg(u_char type,struct rt_addrinfo * rtinfo,int flags,int error)1511 rt_missmsg(u_char type, struct rt_addrinfo *rtinfo, int flags, int error)
1512 {
1513 	struct rt_msghdr_common *rtmh;
1514 	struct mbuf *m;
1515 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1516 	struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1517 
1518 	if (route_cb.any_count == 0) {
1519 		return;
1520 	}
1521 	m = rt_msg1(type, rtinfo);
1522 	if (m == NULL) {
1523 		return;
1524 	}
1525 	rtmh = mtod(m, struct rt_msghdr_common *);
1526 	rtmh->rtm_flags = RTF_DONE | flags;
1527 	rtmh->rtm_errno = error;
1528 	rtmh->rtm_addrs = rtinfo->rti_addrs;
1529 	route_proto.sp_family = sa ? sa->sa_family : 0;
1530 	raw_input(m, &route_proto, &route_src, &route_dst);
1531 }
1532 
1533 /*
1534  * This routine is called to generate a message from the routing
1535  * socket indicating that the status of a network interface has changed.
1536  */
1537 void
rt_ifmsg(struct ifnet * ifp)1538 rt_ifmsg(struct ifnet *ifp)
1539 {
1540 	struct if_msghdr *ifm;
1541 	struct mbuf *m;
1542 	struct rt_addrinfo info;
1543 	struct  sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1544 
1545 	if (route_cb.any_count == 0) {
1546 		return;
1547 	}
1548 	bzero((caddr_t)&info, sizeof(info));
1549 	m = rt_msg1(RTM_IFINFO, &info);
1550 	if (m == NULL) {
1551 		return;
1552 	}
1553 	ifm = mtod(m, struct if_msghdr *);
1554 	ifm->ifm_index = ifp->if_index;
1555 	ifm->ifm_flags = (u_short)ifp->if_flags;
1556 	if_data_internal_to_if_data(ifp, &ifp->if_data, &ifm->ifm_data);
1557 	ifm->ifm_addrs = 0;
1558 	raw_input(m, &route_proto, &route_src, &route_dst);
1559 }
1560 
1561 /*
1562  * This is called to generate messages from the routing socket
1563  * indicating a network interface has had addresses associated with it.
1564  * if we ever reverse the logic and replace messages TO the routing
1565  * socket indicate a request to configure interfaces, then it will
1566  * be unnecessary as the routing socket will automatically generate
1567  * copies of it.
1568  *
1569  * Since this is coming from the interface, it is expected that the
1570  * interface will be locked.  Caller must hold rnh_lock and rt_lock.
1571  */
1572 void
rt_newaddrmsg(u_char cmd,struct ifaddr * ifa,int error,struct rtentry * rt)1573 rt_newaddrmsg(u_char cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1574 {
1575 	struct rt_addrinfo info;
1576 	struct sockaddr *sa = 0;
1577 	int pass;
1578 	struct mbuf *m = 0;
1579 	struct ifnet *ifp = ifa->ifa_ifp;
1580 	struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1581 
1582 	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1583 	RT_LOCK_ASSERT_HELD(rt);
1584 
1585 	if (route_cb.any_count == 0) {
1586 		return;
1587 	}
1588 
1589 	/* Become a regular mutex, just in case */
1590 	RT_CONVERT_LOCK(rt);
1591 	for (pass = 1; pass < 3; pass++) {
1592 		bzero((caddr_t)&info, sizeof(info));
1593 		if ((cmd == RTM_ADD && pass == 1) ||
1594 		    (cmd == RTM_DELETE && pass == 2)) {
1595 			struct ifa_msghdr *ifam;
1596 			u_char ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1597 
1598 			/* Lock ifp for if_lladdr */
1599 			ifnet_lock_shared(ifp);
1600 			IFA_LOCK(ifa);
1601 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1602 			/*
1603 			 * Holding ifnet lock here prevents the link address
1604 			 * from changing contents, so no need to hold its
1605 			 * lock.  The link address is always present; it's
1606 			 * never freed.
1607 			 */
1608 			info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1609 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1610 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1611 			if ((m = rt_msg1(ncmd, &info)) == NULL) {
1612 				IFA_UNLOCK(ifa);
1613 				ifnet_lock_done(ifp);
1614 				continue;
1615 			}
1616 			IFA_UNLOCK(ifa);
1617 			ifnet_lock_done(ifp);
1618 			ifam = mtod(m, struct ifa_msghdr *);
1619 			ifam->ifam_index = ifp->if_index;
1620 			IFA_LOCK_SPIN(ifa);
1621 			ifam->ifam_metric = ifa->ifa_metric;
1622 			ifam->ifam_flags = ifa->ifa_flags;
1623 			IFA_UNLOCK(ifa);
1624 			ifam->ifam_addrs = info.rti_addrs;
1625 		}
1626 		if ((cmd == RTM_ADD && pass == 2) ||
1627 		    (cmd == RTM_DELETE && pass == 1)) {
1628 			struct rt_msghdr *rtm;
1629 
1630 			if (rt == NULL) {
1631 				continue;
1632 			}
1633 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1634 			info.rti_info[RTAX_DST] = sa = rt_key(rt);
1635 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1636 			if ((m = rt_msg1(cmd, &info)) == NULL) {
1637 				continue;
1638 			}
1639 			rtm = mtod(m, struct rt_msghdr *);
1640 			rtm->rtm_index = ifp->if_index;
1641 			rtm->rtm_flags |= rt->rt_flags;
1642 			rtm->rtm_errno = error;
1643 			rtm->rtm_addrs = info.rti_addrs;
1644 		}
1645 		route_proto.sp_protocol = sa ? sa->sa_family : 0;
1646 		raw_input(m, &route_proto, &route_src, &route_dst);
1647 	}
1648 }
1649 
1650 /*
1651  * This is the analogue to the rt_newaddrmsg which performs the same
1652  * function but for multicast group memberhips.  This is easier since
1653  * there is no route state to worry about.
1654  */
1655 void
rt_newmaddrmsg(u_char cmd,struct ifmultiaddr * ifma)1656 rt_newmaddrmsg(u_char cmd, struct ifmultiaddr *ifma)
1657 {
1658 	struct rt_addrinfo info;
1659 	struct mbuf *m = 0;
1660 	struct ifnet *ifp = ifma->ifma_ifp;
1661 	struct ifma_msghdr *ifmam;
1662 	struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1663 
1664 	if (route_cb.any_count == 0) {
1665 		return;
1666 	}
1667 
1668 	/* Lock ifp for if_lladdr */
1669 	ifnet_lock_shared(ifp);
1670 	bzero((caddr_t)&info, sizeof(info));
1671 	IFMA_LOCK(ifma);
1672 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1673 	/* lladdr doesn't need lock */
1674 	info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1675 
1676 	/*
1677 	 * If a link-layer address is present, present it as a ``gateway''
1678 	 * (similarly to how ARP entries, e.g., are presented).
1679 	 */
1680 	info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ?
1681 	    ifma->ifma_ll->ifma_addr : NULL;
1682 	if ((m = rt_msg1(cmd, &info)) == NULL) {
1683 		IFMA_UNLOCK(ifma);
1684 		ifnet_lock_done(ifp);
1685 		return;
1686 	}
1687 	ifmam = mtod(m, struct ifma_msghdr *);
1688 	ifmam->ifmam_index = ifp->if_index;
1689 	ifmam->ifmam_addrs = info.rti_addrs;
1690 	route_proto.sp_protocol = ifma->ifma_addr->sa_family;
1691 	IFMA_UNLOCK(ifma);
1692 	ifnet_lock_done(ifp);
1693 	raw_input(m, &route_proto, &route_src, &route_dst);
1694 }
1695 
1696 const char *
rtm2str(int cmd)1697 rtm2str(int cmd)
1698 {
1699 	const char *c __null_terminated = "RTM_?";
1700 
1701 	switch (cmd) {
1702 	case RTM_ADD:
1703 		c = "RTM_ADD";
1704 		break;
1705 	case RTM_DELETE:
1706 		c = "RTM_DELETE";
1707 		break;
1708 	case RTM_CHANGE:
1709 		c = "RTM_CHANGE";
1710 		break;
1711 	case RTM_GET:
1712 		c = "RTM_GET";
1713 		break;
1714 	case RTM_LOSING:
1715 		c = "RTM_LOSING";
1716 		break;
1717 	case RTM_REDIRECT:
1718 		c = "RTM_REDIRECT";
1719 		break;
1720 	case RTM_MISS:
1721 		c = "RTM_MISS";
1722 		break;
1723 	case RTM_LOCK:
1724 		c = "RTM_LOCK";
1725 		break;
1726 	case RTM_OLDADD:
1727 		c = "RTM_OLDADD";
1728 		break;
1729 	case RTM_OLDDEL:
1730 		c = "RTM_OLDDEL";
1731 		break;
1732 	case RTM_RESOLVE:
1733 		c = "RTM_RESOLVE";
1734 		break;
1735 	case RTM_NEWADDR:
1736 		c = "RTM_NEWADDR";
1737 		break;
1738 	case RTM_DELADDR:
1739 		c = "RTM_DELADDR";
1740 		break;
1741 	case RTM_IFINFO:
1742 		c = "RTM_IFINFO";
1743 		break;
1744 	case RTM_NEWMADDR:
1745 		c = "RTM_NEWMADDR";
1746 		break;
1747 	case RTM_DELMADDR:
1748 		c = "RTM_DELMADDR";
1749 		break;
1750 	case RTM_GET_SILENT:
1751 		c = "RTM_GET_SILENT";
1752 		break;
1753 	case RTM_IFINFO2:
1754 		c = "RTM_IFINFO2";
1755 		break;
1756 	case RTM_NEWMADDR2:
1757 		c = "RTM_NEWMADDR2";
1758 		break;
1759 	case RTM_GET2:
1760 		c = "RTM_GET2";
1761 		break;
1762 	case RTM_GET_EXT:
1763 		c = "RTM_GET_EXT";
1764 		break;
1765 	}
1766 
1767 	return c;
1768 }
1769 
1770 /*
1771  * This is used in dumping the kernel table via sysctl().
1772  */
1773 static int
sysctl_dumpentry(struct radix_node * rn,void * vw)1774 sysctl_dumpentry(struct radix_node *rn, void *vw)
1775 {
1776 	walkarg_ref_t w = vw;
1777 	rtentry_ref_t rt = rn_rtentry(rn);
1778 	int error = 0, size;
1779 	struct rt_addrinfo info;
1780 	kauth_cred_t cred __single;
1781 	kauth_cred_t *credp;
1782 
1783 	cred = current_cached_proc_cred(PROC_NULL);
1784 	credp = &cred;
1785 
1786 	RT_LOCK(rt);
1787 	if ((w->w_op == NET_RT_FLAGS || w->w_op == NET_RT_FLAGS_PRIV) &&
1788 	    !(rt->rt_flags & w->w_arg)) {
1789 		goto done;
1790 	}
1791 
1792 	/*
1793 	 * If the matching route has RTF_LLINFO set, then we can skip scrubbing the MAC
1794 	 * only if the outgoing interface is not loopback and the process has entitlement
1795 	 * for neighbor cache read.
1796 	 */
1797 	if (w->w_op == NET_RT_FLAGS_PRIV && (rt->rt_flags & RTF_LLINFO)) {
1798 		if (rt->rt_ifp != lo_ifp &&
1799 		    (route_op_entitlement_check(NULL, cred, ROUTE_OP_READ, TRUE) == 0)) {
1800 			credp = NULL;
1801 		}
1802 	}
1803 
1804 	bzero((caddr_t)&info, sizeof(info));
1805 	info.rti_info[RTAX_DST] = rt_key(rt);
1806 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1807 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1808 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1809 	if (RT_HAS_IFADDR(rt)) {
1810 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1811 	}
1812 
1813 	if (w->w_op != NET_RT_DUMP2) {
1814 		size = rt_msg2(RTM_GET, &info, NULL, w, credp);
1815 		if (w->w_req != NULL && w->w_tmem != NULL) {
1816 			struct rt_msghdr *rtm =
1817 			    (struct rt_msghdr *)(void *)w->w_tmem;
1818 
1819 			rtm->rtm_flags = rt->rt_flags;
1820 			rtm->rtm_use = rt->rt_use;
1821 			rt_getmetrics(rt, &rtm->rtm_rmx);
1822 			rtm->rtm_index = rt->rt_ifp->if_index;
1823 			rtm->rtm_pid = 0;
1824 			rtm->rtm_seq = 0;
1825 			rtm->rtm_errno = 0;
1826 			rtm->rtm_addrs = info.rti_addrs;
1827 			error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1828 		}
1829 	} else {
1830 		size = rt_msg2(RTM_GET2, &info, NULL, w, credp);
1831 		if (w->w_req != NULL && w->w_tmem != NULL) {
1832 			struct rt_msghdr2 *rtm =
1833 			    (struct rt_msghdr2 *)(void *)w->w_tmem;
1834 
1835 			rtm->rtm_flags = rt->rt_flags;
1836 			rtm->rtm_use = rt->rt_use;
1837 			rt_getmetrics(rt, &rtm->rtm_rmx);
1838 			rtm->rtm_index = rt->rt_ifp->if_index;
1839 			rtm->rtm_refcnt = rt->rt_refcnt;
1840 			if (rt->rt_parent) {
1841 				rtm->rtm_parentflags = rt->rt_parent->rt_flags;
1842 			} else {
1843 				rtm->rtm_parentflags = 0;
1844 			}
1845 			rtm->rtm_reserved = 0;
1846 			rtm->rtm_addrs = info.rti_addrs;
1847 			error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1848 		}
1849 	}
1850 
1851 done:
1852 	RT_UNLOCK(rt);
1853 	return error;
1854 }
1855 
1856 /*
1857  * This is used for dumping extended information from route entries.
1858  */
1859 static int
sysctl_dumpentry_ext(struct radix_node * rn,void * vw)1860 sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
1861 {
1862 	walkarg_ref_t w = vw;
1863 	rtentry_ref_t rt = rn_rtentry(rn);
1864 	int error = 0, size;
1865 	struct rt_addrinfo info;
1866 	kauth_cred_t cred __single;
1867 
1868 	cred = current_cached_proc_cred(PROC_NULL);
1869 
1870 	RT_LOCK(rt);
1871 	if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) {
1872 		goto done;
1873 	}
1874 	bzero(&info, sizeof(info));
1875 	info.rti_info[RTAX_DST] = rt_key(rt);
1876 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1877 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1878 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1879 
1880 	size = rt_msg2(RTM_GET_EXT, &info, NULL, w, &cred);
1881 	if (w->w_req != NULL && w->w_tmem != NULL) {
1882 		struct rt_msghdr_ext *ertm =
1883 		    (struct rt_msghdr_ext *)(void *)w->w_tmem;
1884 
1885 		ertm->rtm_flags = rt->rt_flags;
1886 		ertm->rtm_use = rt->rt_use;
1887 		rt_getmetrics(rt, &ertm->rtm_rmx);
1888 		ertm->rtm_index = rt->rt_ifp->if_index;
1889 		ertm->rtm_pid = 0;
1890 		ertm->rtm_seq = 0;
1891 		ertm->rtm_errno = 0;
1892 		ertm->rtm_addrs = info.rti_addrs;
1893 		if (rt->rt_llinfo_get_ri == NULL) {
1894 			bzero(&ertm->rtm_ri, sizeof(ertm->rtm_ri));
1895 			ertm->rtm_ri.ri_rssi = IFNET_RSSI_UNKNOWN;
1896 			ertm->rtm_ri.ri_lqm = IFNET_LQM_THRESH_OFF;
1897 			ertm->rtm_ri.ri_npm = IFNET_NPM_THRESH_UNKNOWN;
1898 		} else {
1899 			rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri);
1900 		}
1901 		error = SYSCTL_OUT(w->w_req, (caddr_t)ertm, size);
1902 	}
1903 
1904 done:
1905 	RT_UNLOCK(rt);
1906 	return error;
1907 }
1908 
1909 static boolean_t
should_include_clat46(void)1910 should_include_clat46(void)
1911 {
1912 #define CLAT46_ENTITLEMENT "com.apple.private.route.iflist.include-clat46"
1913 	return IOCurrentTaskHasEntitlement(CLAT46_ENTITLEMENT);
1914 }
1915 
1916 static boolean_t
is_clat46_address(struct ifaddr * ifa)1917 is_clat46_address(struct ifaddr *ifa)
1918 {
1919 	boolean_t       is_clat46 = FALSE;
1920 
1921 	if (ifa->ifa_addr->sa_family == AF_INET6) {
1922 		struct in6_ifaddr *ifa6 = ifatoia6(ifa);
1923 
1924 		is_clat46 = (ifa6->ia6_flags & IN6_IFF_CLAT46) != 0;
1925 	}
1926 	return is_clat46;
1927 }
1928 
1929 /*
1930  * rdar://9307819
1931  * To avoid to call copyout() while holding locks and to cause problems
1932  * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct
1933  * the list in two passes. In the first pass we compute the total
1934  * length of the data we are going to copyout, then we release
1935  * all locks to allocate a temporary buffer that gets filled
1936  * in the second pass.
1937  *
1938  * Note that we are verifying the assumption that kalloc() returns a buffer
1939  * that is at least 32 bits aligned and that the messages and addresses are
1940  * 32 bits aligned.
1941  */
1942 static int
sysctl_iflist(int af,struct walkarg * w)1943 sysctl_iflist(int af, struct walkarg *w)
1944 {
1945 	struct ifnet *ifp;
1946 	struct ifaddr *ifa;
1947 	struct  rt_addrinfo info;
1948 	int     error = 0;
1949 	int     pass = 0;
1950 	size_t  len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1951 	char    *total_buffer = NULL, *cp = NULL;
1952 	kauth_cred_t cred __single;
1953 	boolean_t include_clat46 = FALSE;
1954 	boolean_t include_clat46_valid = FALSE;
1955 
1956 	cred = current_cached_proc_cred(PROC_NULL);
1957 
1958 	bzero((caddr_t)&info, sizeof(info));
1959 
1960 	for (pass = 0; pass < 2; pass++) {
1961 		ifnet_head_lock_shared();
1962 
1963 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1964 			if (error) {
1965 				break;
1966 			}
1967 			if (w->w_arg && w->w_arg != ifp->if_index) {
1968 				continue;
1969 			}
1970 			ifnet_lock_shared(ifp);
1971 			/*
1972 			 * Holding ifnet lock here prevents the link address
1973 			 * from changing contents, so no need to hold the ifa
1974 			 * lock.  The link address is always present; it's
1975 			 * never freed.
1976 			 */
1977 			ifa = ifp->if_lladdr;
1978 			info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1979 			len = rt_msg2(RTM_IFINFO, &info, NULL, NULL, &cred);
1980 			if (pass == 0) {
1981 				if (os_add_overflow(total_len, len, &total_len)) {
1982 					ifnet_lock_done(ifp);
1983 					error = ENOBUFS;
1984 					break;
1985 				}
1986 			} else {
1987 				struct if_msghdr *ifm;
1988 
1989 				if (current_len + len > total_len) {
1990 					ifnet_lock_done(ifp);
1991 					error = ENOBUFS;
1992 					break;
1993 				}
1994 				info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1995 				len = rt_msg2(RTM_IFINFO, &info,
1996 				    (caddr_t)cp, NULL, &cred);
1997 				info.rti_info[RTAX_IFP] = NULL;
1998 
1999 				ifm = (struct if_msghdr *)(void *)cp;
2000 				ifm->ifm_index = ifp->if_index;
2001 				ifm->ifm_flags = (u_short)ifp->if_flags;
2002 				if_data_internal_to_if_data(ifp, &ifp->if_data,
2003 				    &ifm->ifm_data);
2004 				ifm->ifm_addrs = info.rti_addrs;
2005 				/*
2006 				 * <rdar://problem/32940901>
2007 				 * Round bytes only for non-platform
2008 				 */
2009 				if (!csproc_get_platform_binary(w->w_req->p)) {
2010 					ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
2011 					ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
2012 				}
2013 
2014 				cp += len;
2015 				VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2016 				current_len += len;
2017 				VERIFY(current_len <= total_len);
2018 			}
2019 			while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
2020 				boolean_t is_clat46;
2021 
2022 				IFA_LOCK(ifa);
2023 				if (af && af != ifa->ifa_addr->sa_family) {
2024 					IFA_UNLOCK(ifa);
2025 					continue;
2026 				}
2027 				is_clat46 = is_clat46_address(ifa);
2028 				if (is_clat46) {
2029 					if (!include_clat46_valid) {
2030 						include_clat46_valid = TRUE;
2031 						include_clat46 =
2032 						    should_include_clat46();
2033 					}
2034 					if (!include_clat46) {
2035 						IFA_UNLOCK(ifa);
2036 						continue;
2037 					}
2038 				}
2039 				info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2040 				info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2041 				info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2042 				len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2043 				    &cred);
2044 				if (pass == 0) {
2045 					if (os_add_overflow(total_len, len, &total_len)) {
2046 						IFA_UNLOCK(ifa);
2047 						error = ENOBUFS;
2048 						break;
2049 					}
2050 				} else {
2051 					struct ifa_msghdr *ifam;
2052 
2053 					if (current_len + len > total_len) {
2054 						IFA_UNLOCK(ifa);
2055 						error = ENOBUFS;
2056 						break;
2057 					}
2058 					len = rt_msg2(RTM_NEWADDR, &info,
2059 					    (caddr_t)cp, NULL, &cred);
2060 
2061 					ifam = (struct ifa_msghdr *)(void *)cp;
2062 					ifam->ifam_index =
2063 					    ifa->ifa_ifp->if_index;
2064 					ifam->ifam_flags = ifa->ifa_flags;
2065 					ifam->ifam_metric = ifa->ifa_metric;
2066 					ifam->ifam_addrs = info.rti_addrs;
2067 
2068 					cp += len;
2069 					VERIFY(IS_P2ALIGNED(cp,
2070 					    sizeof(u_int32_t)));
2071 					current_len += len;
2072 					VERIFY(current_len <= total_len);
2073 				}
2074 				IFA_UNLOCK(ifa);
2075 			}
2076 			ifnet_lock_done(ifp);
2077 			info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2078 			    info.rti_info[RTAX_BRD] = NULL;
2079 		}
2080 
2081 		ifnet_head_done();
2082 
2083 		if (error != 0) {
2084 			if (error == ENOBUFS) {
2085 				printf("%s: current_len (%lu) + len (%lu) > "
2086 				    "total_len (%lu)\n", __func__, current_len,
2087 				    len, total_len);
2088 			}
2089 			break;
2090 		}
2091 
2092 		if (pass == 0) {
2093 			/* Better to return zero length buffer than ENOBUFS */
2094 			if (total_len == 0) {
2095 				total_len = 1;
2096 			}
2097 			total_len += total_len >> 3;
2098 			total_buffer_len = total_len;
2099 			total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2100 			if (total_buffer == NULL) {
2101 				printf("%s: kalloc_data(%lu) failed\n", __func__,
2102 				    total_len);
2103 				error = ENOBUFS;
2104 				break;
2105 			}
2106 			cp = total_buffer;
2107 			VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2108 		} else {
2109 			error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2110 			if (error) {
2111 				break;
2112 			}
2113 		}
2114 	}
2115 
2116 	if (total_buffer != NULL) {
2117 		kfree_data(total_buffer, total_buffer_len);
2118 	}
2119 
2120 	return error;
2121 }
2122 
2123 static int
sysctl_iflist2(int af,struct walkarg * w)2124 sysctl_iflist2(int af, struct walkarg *w)
2125 {
2126 	struct ifnet *ifp;
2127 	struct ifaddr *ifa;
2128 	struct  rt_addrinfo info;
2129 	int     error = 0;
2130 	int     pass = 0;
2131 	size_t  len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
2132 	char    *total_buffer = NULL, *cp = NULL;
2133 	kauth_cred_t cred __single;
2134 	boolean_t include_clat46 = FALSE;
2135 	boolean_t include_clat46_valid = FALSE;
2136 
2137 	cred = current_cached_proc_cred(PROC_NULL);
2138 
2139 	bzero((caddr_t)&info, sizeof(info));
2140 
2141 	for (pass = 0; pass < 2; pass++) {
2142 		struct ifmultiaddr *ifma;
2143 
2144 		ifnet_head_lock_shared();
2145 
2146 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
2147 			if (error) {
2148 				break;
2149 			}
2150 			if (w->w_arg && w->w_arg != ifp->if_index) {
2151 				continue;
2152 			}
2153 			ifnet_lock_shared(ifp);
2154 			/*
2155 			 * Holding ifnet lock here prevents the link address
2156 			 * from changing contents, so no need to hold the ifa
2157 			 * lock.  The link address is always present; it's
2158 			 * never freed.
2159 			 */
2160 			ifa = ifp->if_lladdr;
2161 			info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2162 			len = rt_msg2(RTM_IFINFO2, &info, NULL, NULL, &cred);
2163 			if (pass == 0) {
2164 				if (os_add_overflow(total_len, len, &total_len)) {
2165 					ifnet_lock_done(ifp);
2166 					error = ENOBUFS;
2167 					break;
2168 				}
2169 			} else {
2170 				struct if_msghdr2 *ifm;
2171 
2172 				if (current_len + len > total_len) {
2173 					ifnet_lock_done(ifp);
2174 					error = ENOBUFS;
2175 					break;
2176 				}
2177 				info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2178 				len = rt_msg2(RTM_IFINFO2, &info,
2179 				    (caddr_t)cp, NULL, &cred);
2180 				info.rti_info[RTAX_IFP] = NULL;
2181 
2182 				ifm = (struct if_msghdr2 *)(void *)cp;
2183 				ifm->ifm_addrs = info.rti_addrs;
2184 				ifm->ifm_flags = (u_short)ifp->if_flags;
2185 				ifm->ifm_index = ifp->if_index;
2186 				ifm->ifm_snd_len = IFCQ_LEN(ifp->if_snd);
2187 				ifm->ifm_snd_maxlen = IFCQ_MAXLEN(ifp->if_snd);
2188 				ifm->ifm_snd_drops =
2189 				    (int)ifp->if_snd->ifcq_dropcnt.packets;
2190 				ifm->ifm_timer = ifp->if_timer;
2191 				if_data_internal_to_if_data64(ifp,
2192 				    &ifp->if_data, &ifm->ifm_data);
2193 				/*
2194 				 * <rdar://problem/32940901>
2195 				 * Round bytes only for non-platform
2196 				 */
2197 				if (!csproc_get_platform_binary(w->w_req->p)) {
2198 					ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
2199 					ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
2200 				}
2201 
2202 				cp += len;
2203 				VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2204 				current_len += len;
2205 				VERIFY(current_len <= total_len);
2206 			}
2207 			while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
2208 				boolean_t is_clat46;
2209 
2210 				IFA_LOCK(ifa);
2211 				if (af && af != ifa->ifa_addr->sa_family) {
2212 					IFA_UNLOCK(ifa);
2213 					continue;
2214 				}
2215 				is_clat46 = is_clat46_address(ifa);
2216 				if (is_clat46) {
2217 					if (!include_clat46_valid) {
2218 						include_clat46_valid = TRUE;
2219 						include_clat46 =
2220 						    should_include_clat46();
2221 					}
2222 					if (!include_clat46) {
2223 						IFA_UNLOCK(ifa);
2224 						continue;
2225 					}
2226 				}
2227 				info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2228 				info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2229 				info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2230 				len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2231 				    &cred);
2232 				if (pass == 0) {
2233 					if (os_add_overflow(total_len, len, &total_len)) {
2234 						IFA_UNLOCK(ifa);
2235 						error = ENOBUFS;
2236 						break;
2237 					}
2238 				} else {
2239 					struct ifa_msghdr *ifam;
2240 
2241 					if (current_len + len > total_len) {
2242 						IFA_UNLOCK(ifa);
2243 						error = ENOBUFS;
2244 						break;
2245 					}
2246 					len = rt_msg2(RTM_NEWADDR, &info,
2247 					    (caddr_t)cp, NULL, &cred);
2248 
2249 					ifam = (struct ifa_msghdr *)(void *)cp;
2250 					ifam->ifam_index =
2251 					    ifa->ifa_ifp->if_index;
2252 					ifam->ifam_flags = ifa->ifa_flags;
2253 					ifam->ifam_metric = ifa->ifa_metric;
2254 					ifam->ifam_addrs = info.rti_addrs;
2255 
2256 					cp += len;
2257 					VERIFY(IS_P2ALIGNED(cp,
2258 					    sizeof(u_int32_t)));
2259 					current_len += len;
2260 					VERIFY(current_len <= total_len);
2261 				}
2262 				IFA_UNLOCK(ifa);
2263 			}
2264 			if (error) {
2265 				ifnet_lock_done(ifp);
2266 				break;
2267 			}
2268 
2269 			for (ifma = LIST_FIRST(&ifp->if_multiaddrs);
2270 			    ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) {
2271 				struct ifaddr *ifa0;
2272 
2273 				IFMA_LOCK(ifma);
2274 				if (af && af != ifma->ifma_addr->sa_family) {
2275 					IFMA_UNLOCK(ifma);
2276 					continue;
2277 				}
2278 				bzero((caddr_t)&info, sizeof(info));
2279 				info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2280 				/*
2281 				 * Holding ifnet lock here prevents the link
2282 				 * address from changing contents, so no need
2283 				 * to hold the ifa0 lock.  The link address is
2284 				 * always present; it's never freed.
2285 				 */
2286 				ifa0 = ifp->if_lladdr;
2287 				info.rti_info[RTAX_IFP] = ifa0->ifa_addr;
2288 				if (ifma->ifma_ll != NULL) {
2289 					info.rti_info[RTAX_GATEWAY] =
2290 					    ifma->ifma_ll->ifma_addr;
2291 				}
2292 				len = rt_msg2(RTM_NEWMADDR2, &info, NULL, NULL,
2293 				    &cred);
2294 				if (pass == 0) {
2295 					total_len += len;
2296 				} else {
2297 					struct ifma_msghdr2 *ifmam;
2298 
2299 					if (current_len + len > total_len) {
2300 						IFMA_UNLOCK(ifma);
2301 						error = ENOBUFS;
2302 						break;
2303 					}
2304 					len = rt_msg2(RTM_NEWMADDR2, &info,
2305 					    (caddr_t)cp, NULL, &cred);
2306 
2307 					ifmam =
2308 					    (struct ifma_msghdr2 *)(void *)cp;
2309 					ifmam->ifmam_addrs = info.rti_addrs;
2310 					ifmam->ifmam_flags = 0;
2311 					ifmam->ifmam_index =
2312 					    ifma->ifma_ifp->if_index;
2313 					ifmam->ifmam_refcount =
2314 					    ifma->ifma_reqcnt;
2315 
2316 					cp += len;
2317 					VERIFY(IS_P2ALIGNED(cp,
2318 					    sizeof(u_int32_t)));
2319 					current_len += len;
2320 				}
2321 				IFMA_UNLOCK(ifma);
2322 			}
2323 			ifnet_lock_done(ifp);
2324 			info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2325 			    info.rti_info[RTAX_BRD] = NULL;
2326 		}
2327 		ifnet_head_done();
2328 
2329 		if (error) {
2330 			if (error == ENOBUFS) {
2331 				printf("%s: current_len (%lu) + len (%lu) > "
2332 				    "total_len (%lu)\n", __func__, current_len,
2333 				    len, total_len);
2334 			}
2335 			break;
2336 		}
2337 
2338 		if (pass == 0) {
2339 			/* Better to return zero length buffer than ENOBUFS */
2340 			if (total_len == 0) {
2341 				total_len = 1;
2342 			}
2343 			total_len += total_len >> 3;
2344 			total_buffer_len = total_len;
2345 			total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2346 			if (total_buffer == NULL) {
2347 				printf("%s: kalloc_data(%lu) failed\n", __func__,
2348 				    total_len);
2349 				error = ENOBUFS;
2350 				break;
2351 			}
2352 			cp = total_buffer;
2353 			VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2354 		} else {
2355 			error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2356 			if (error) {
2357 				break;
2358 			}
2359 		}
2360 	}
2361 
2362 	if (total_buffer != NULL) {
2363 		kfree_data(total_buffer, total_buffer_len);
2364 	}
2365 
2366 	return error;
2367 }
2368 
2369 
2370 static int
sysctl_rtstat(struct sysctl_req * req)2371 sysctl_rtstat(struct sysctl_req *req)
2372 {
2373 	struct rtstat rtstat_compat = { 0 };
2374 
2375 #define RTSTAT_COMPAT(_field) rtstat_compat._field = rtstat._field < SHRT_MAX ? (short)rtstat._field : SHRT_MAX
2376 	RTSTAT_COMPAT(rts_badredirect);
2377 	RTSTAT_COMPAT(rts_dynamic);
2378 	RTSTAT_COMPAT(rts_newgateway);
2379 	RTSTAT_COMPAT(rts_unreach);
2380 	RTSTAT_COMPAT(rts_wildcard);
2381 	RTSTAT_COMPAT(rts_badrtgwroute);
2382 #undef RTSTAT_TO_COMPAT
2383 
2384 	return SYSCTL_OUT(req, &rtstat_compat, sizeof(struct rtstat));
2385 }
2386 
2387 static int
sysctl_rtstat_64(struct sysctl_req * req)2388 sysctl_rtstat_64(struct sysctl_req *req)
2389 {
2390 	return SYSCTL_OUT(req, &rtstat, sizeof(struct rtstat_64));
2391 }
2392 
2393 static int
sysctl_rttrash(struct sysctl_req * req)2394 sysctl_rttrash(struct sysctl_req *req)
2395 {
2396 	return SYSCTL_OUT(req, &rttrash, sizeof(rttrash));
2397 }
2398 
2399 static int
2400 sysctl_rtsock SYSCTL_HANDLER_ARGS
2401 {
2402 #pragma unused(oidp)
2403 	DECLARE_SYSCTL_HANDLER_ARG_ARRAY(int, 4, name, namelen);
2404 	struct radix_node_head *rnh;
2405 	int     i, error = EINVAL;
2406 	u_char  af;
2407 	struct  walkarg w;
2408 
2409 	name++;
2410 	namelen--;
2411 	if (req->newptr) {
2412 		return EPERM;
2413 	}
2414 	af = (u_char)name[0];
2415 	Bzero(&w, sizeof(w));
2416 	w.w_op = name[1];
2417 	w.w_arg = name[2];
2418 	w.w_req = req;
2419 
2420 	switch (w.w_op) {
2421 	case NET_RT_DUMP:
2422 	case NET_RT_DUMP2:
2423 	case NET_RT_FLAGS:
2424 	case NET_RT_FLAGS_PRIV:
2425 		lck_mtx_lock(rnh_lock);
2426 		for (i = 1; i <= AF_MAX; i++) {
2427 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2428 			    (error = rnh->rnh_walktree(rnh,
2429 			    sysctl_dumpentry, &w))) {
2430 				break;
2431 			}
2432 		}
2433 		lck_mtx_unlock(rnh_lock);
2434 		break;
2435 	case NET_RT_DUMPX:
2436 	case NET_RT_DUMPX_FLAGS:
2437 		lck_mtx_lock(rnh_lock);
2438 		for (i = 1; i <= AF_MAX; i++) {
2439 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2440 			    (error = rnh->rnh_walktree(rnh,
2441 			    sysctl_dumpentry_ext, &w))) {
2442 				break;
2443 			}
2444 		}
2445 		lck_mtx_unlock(rnh_lock);
2446 		break;
2447 	case NET_RT_IFLIST:
2448 		error = sysctl_iflist(af, &w);
2449 		break;
2450 	case NET_RT_IFLIST2:
2451 		error = sysctl_iflist2(af, &w);
2452 		break;
2453 	case NET_RT_STAT:
2454 		error = sysctl_rtstat(req);
2455 		break;
2456 	case NET_RT_STAT_64:
2457 		error = sysctl_rtstat_64(req);
2458 		break;
2459 	case NET_RT_TRASH:
2460 		error = sysctl_rttrash(req);
2461 		break;
2462 	}
2463 	if (w.w_tmem != NULL) {
2464 		kfree_data_sized_by(w.w_tmem, w.w_tmemsize);
2465 	}
2466 	return error;
2467 }
2468 
2469 /*
2470  * Definitions of protocols supported in the ROUTE domain.
2471  */
2472 static struct protosw routesw[] = {
2473 	{
2474 		.pr_type =              SOCK_RAW,
2475 		.pr_protocol =          0,
2476 		.pr_flags =             PR_ATOMIC | PR_ADDR,
2477 		.pr_output =            route_output,
2478 		.pr_ctlinput =          raw_ctlinput,
2479 		.pr_usrreqs =           &route_usrreqs,
2480 	}
2481 };
2482 
2483 static int route_proto_count = (sizeof(routesw) / sizeof(struct protosw));
2484 
2485 struct domain routedomain_s = {
2486 	.dom_family =           PF_ROUTE,
2487 	.dom_name =             "route",
2488 	.dom_init =             route_dinit,
2489 };
2490 
2491 static void
route_dinit(struct domain * dp)2492 route_dinit(struct domain *dp)
2493 {
2494 	struct protosw *pr;
2495 	int i;
2496 
2497 	VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
2498 	VERIFY(routedomain == NULL);
2499 
2500 	routedomain = dp;
2501 
2502 	for (i = 0, pr = &routesw[0]; i < route_proto_count; i++, pr++) {
2503 		net_add_proto(pr, dp, 1);
2504 	}
2505 
2506 	route_init();
2507 }
2508