xref: /xnu-12377.41.6/bsd/net/radix.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988, 1989, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)radix.c	8.4 (Berkeley) 11/2/94
61  * $FreeBSD: src/sys/net/radix.c,v 1.20.2.2 2001/03/06 00:56:50 obrien Exp $
62  */
63 
64 /*
65  * Routines to build and maintain radix trees for routing lookups.
66  */
67 #ifndef _RADIX_H_
68 #include <sys/cdefs.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/domain.h>
72 #include <sys/syslog.h>
73 #include <net/radix.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <kern/locks.h>
77 #endif
78 
79 static int      rn_walktree_from(struct radix_node_head *h, void *a,
80     void *m, walktree_f_t *f, void *w);
81 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
82 static struct radix_node *rn_insert(void *, struct radix_node_head *, int *, struct radix_node[2]);
83 static struct radix_node *rn_newpair(const void * __sized_by(vlen), uint8_t vlen, int, struct radix_node[2]);
84 static struct radix_node *rn_search(void *, struct radix_node *);
85 static struct radix_node *rn_search_m(void *, struct radix_node *, void *);
86 
87 static struct radix_mask *rn_mkfreelist;
88 static struct radix_node_head *mask_rnhead;
89 static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
90 
91 KALLOC_TYPE_DEFINE(radix_node_head_zone, struct radix_node_head, KT_DEFAULT);
92 
93 
94 #define MAX_KEYLEN 32
95 #define MAX_KEYLEN_BMASK 0x1F
96 
97 /*
98  * Constant size buffers that are used for the netmask radix maintenance.
99  *
100  * rn_ones     - buffer with all bits set to 1, used when constructing new keys.
101  * rn_zeros    - buffer with all bits set to 0, used when constructing new keys.
102  */
103 
104 /*
105  * Constant size buffers that are used for the netmask radix maintenance.
106  *
107  * rn_ones     - buffer with all bits set to 1, used when constructing new keys.
108  * rn_zeros    - buffer with all bits set to 0, used when constructing new keys.
109  */
110 static const char rn_zeros[MAX_KEYLEN] = {
111 	0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
112 	0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
113 	0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
114 	0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
115 };
116 
117 static const char rn_ones[MAX_KEYLEN] = {
118 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
119 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
120 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
121 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
122 };
123 
124 #define rn_masktop (mask_rnhead->rnh_treetop)
125 #undef Bcmp
126 #define Bcmp(a, b, l) \
127 	(l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (uint32_t)l))
128 
129 static int      rn_lexobetter(void *m_arg, void *n_arg);
130 static struct radix_mask * rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next);
131 static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip,
132     rn_matchf_t *f, void *w);
133 
134 #define RN_MATCHF(rn, f, arg)   (f == NULL || (*f)((rn), arg))
135 
136 
137 /*
138  * Netmask radix tree.
139  *
140  * Most netmasks are going to be same for the different routes.
141  * Because of that, it is important to avoid wasting memory
142  * for the duplicate copies of same mask bitpatterns.
143  *
144  * The radix datastructure solves this by using another radix tree,
145  * which is keyed by the netmask bits.
146  *
147  */
148 
149 /*
150  * Radix tree glue.
151  */
152 struct rn_base_entry {
153 	union {
154 		struct {
155 			struct radix_node tt;
156 			struct radix_node t;
157 		} _split_nodes;
158 #define rnb_tt _split_nodes.tt
159 #define rnb_t _split_nodes.t
160 		struct radix_node rnb_nodes[2];
161 	};
162 };
163 
164 static struct radix_node *
rn_lexical_parent(struct radix_node * tt)165 rn_lexical_parent(struct radix_node *tt)
166 {
167 	struct rn_base_entry *nrn;
168 	nrn = __container_of(tt, struct rn_base_entry, rnb_tt);
169 	return &nrn->rnb_t;
170 }
171 
172 /*
173  * The entry in the netmask radix tree.
174  */
175 struct netmask_rn_entry {
176 	struct rn_base_entry nrn_base;
177 #define nrn_tt nrn_base.rnb_tt
178 #define nrn_t nrn_base.rnb_t
179 	char nrn_netmask[MAX_KEYLEN];
180 };
181 
182 /*
183  * The data structure for the keys is a radix tree with one way
184  * branching removed.  The index rn_bit at an internal node n represents a bit
185  * position to be tested.  The tree is arranged so that all descendants
186  * of a node n have keys whose bits all agree up to position rn_bit - 1.
187  * (We say the index of n is rn_bit.)
188  *
189  * There is at least one descendant which has a one bit at position rn_bit,
190  * and at least one with a zero there.
191  *
192  * A route is determined by a pair of key and mask.  We require that the
193  * bit-wise logical and of the key and mask to be the key.
194  * We define the index of a route to associated with the mask to be
195  * the first bit number in the mask where 0 occurs (with bit number 0
196  * representing the highest order bit).
197  *
198  * We say a mask is normal if every bit is 0, past the index of the mask.
199  * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
200  * and m is a normal mask, then the route applies to every descendant of n.
201  * If the index(m) < rn_bit, this implies the trailing last few bits of k
202  * before bit b are all 0, (and hence consequently true of every descendant
203  * of n), so the route applies to all descendants of the node as well.
204  *
205  * Similar logic shows that a non-normal mask m such that
206  * index(m) <= index(n) could potentially apply to many children of n.
207  * Thus, for each non-host route, we attach its mask to a list at an internal
208  * node as high in the tree as we can go.
209  *
210  * The present version of the code makes use of normal routes in short-
211  * circuiting an explict mask and compare operation when testing whether
212  * a key satisfies a normal route, and also in remembering the unique leaf
213  * that governs a subtree.
214  */
215 
216 static inline void*
217 __sized_by(*arglen)
218 __attribute__((always_inline))
rnarg_unpack(void * packed_arg,uint8_t * arglen)219 rnarg_unpack(void *packed_arg, uint8_t *arglen)
220 {
221 	if (!packed_arg) {
222 		*arglen = 0;
223 		return NULL;
224 	}
225 	*arglen = *((uint8_t * __single)packed_arg);
226 	return __unsafe_forge_bidi_indexable(void *, packed_arg, *arglen);
227 }
228 
229 static inline char
230 __attribute__((always_inline))
rnarg_get(caddr_t rnarg __sized_by (arglen),uint8_t arglen,unsigned int offset)231 rnarg_get(caddr_t rnarg __sized_by(arglen), uint8_t arglen, unsigned int offset)
232 {
233 	if (arglen <= offset) {
234 		return 0;
235 	}
236 	return rnarg[offset];
237 }
238 
239 
240 static struct radix_node *
rn_search(void * v_arg,struct radix_node * head)241 rn_search(void *v_arg, struct radix_node *head)
242 {
243 	struct radix_node *x = head;
244 	uint8_t vlen = 0;
245 	caddr_t v = rnarg_unpack(v_arg, &vlen);
246 
247 	while (x->rn_bit >= 0) {
248 		if (x->rn_bmask & rnarg_get(v, vlen, x->rn_offset)) {
249 			x = x->rn_right;
250 		} else {
251 			x = x->rn_left;
252 		}
253 	}
254 	return x;
255 }
256 
257 static struct radix_node *
rn_search_m(void * v_arg,struct radix_node * head,void * m_arg)258 rn_search_m(void *v_arg, struct radix_node *head, void *m_arg)
259 {
260 	struct radix_node *x = head;
261 	uint8_t vlen = 0;
262 	caddr_t v = rnarg_unpack(v_arg, &vlen);
263 	uint8_t mlen = 0;
264 	caddr_t m = rnarg_unpack(m_arg, &mlen);
265 
266 	while (x->rn_bit >= 0) {
267 		if ((x->rn_bmask & rnarg_get(m, mlen, x->rn_offset)) &&
268 		    (x->rn_bmask & rnarg_get(v, vlen, x->rn_offset))) {
269 			x = x->rn_right;
270 		} else {
271 			x = x->rn_left;
272 		}
273 	}
274 	return x;
275 }
276 
277 int
rn_refines(void * m_arg,void * n_arg)278 rn_refines(void *m_arg, void *n_arg)
279 {
280 	uint8_t mlen, nlen;
281 	caddr_t m = rnarg_unpack(m_arg, &mlen);
282 	caddr_t n = rnarg_unpack(n_arg, &nlen);
283 	caddr_t lim, lim2 = lim = n + nlen;
284 	int longer = nlen - mlen;
285 	n++;
286 	m++;
287 	int masks_are_equal = 1;
288 
289 	if (longer > 0) {
290 		lim -= longer;
291 	}
292 	while (n < lim) {
293 		if (*n & ~(*m)) {
294 			return 0;
295 		}
296 		if (*n++ != *m++) {
297 			masks_are_equal = 0;
298 		}
299 	}
300 	while (n < lim2) {
301 		if (*n++) {
302 			return 0;
303 		}
304 	}
305 	if (masks_are_equal && (longer < 0)) {
306 		for (lim2 = m - longer; m < lim2;) {
307 			if (*m++) {
308 				return 1;
309 			}
310 		}
311 	}
312 	return !masks_are_equal;
313 }
314 
315 struct radix_node *
rn_lookup(void * v_arg,void * m_arg,struct radix_node_head * head)316 rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
317 {
318 	return rn_lookup_args(v_arg, m_arg, head, NULL, NULL);
319 }
320 
321 struct radix_node *
rn_lookup_args(void * v_arg,void * m_arg,struct radix_node_head * head,rn_matchf_t * f,void * w)322 rn_lookup_args(void *v_arg, void *m_arg, struct radix_node_head *head,
323     rn_matchf_t *f, void *w)
324 {
325 	struct radix_node *x;
326 	caddr_t netmask = NULL;
327 
328 	if (m_arg) {
329 		x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
330 		if (x == 0) {
331 			return NULL;
332 		}
333 		/*
334 		 * Note: the auxillary mask is stored as a "key".
335 		 */
336 		netmask = rn_get_key(x);
337 	}
338 	x = rn_match_args(v_arg, head, f, w);
339 	if (x && netmask) {
340 		while (x && rn_get_mask(x) != netmask) {
341 			x = x->rn_dupedkey;
342 		}
343 	}
344 	return x;
345 }
346 
347 /*
348  * Returns true if address 'trial' has no bits differing from the
349  * leaf's key when compared under the leaf's mask.  In other words,
350  * returns true when 'trial' matches leaf.  If a leaf-matching
351  * routine is passed in, it is also used to find a match on the
352  * conditions defined by the caller of rn_match.
353  */
354 static int
rn_satisfies_leaf(char * trial,struct radix_node * leaf,int skip,rn_matchf_t * f,void * w)355 rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip,
356     rn_matchf_t *f, void *w)
357 {
358 	uint8_t cplen;
359 	const char *cp = rnarg_unpack(trial, &cplen);
360 	const char *cp2 = rn_get_key(leaf);
361 	const char *cp3 = rn_get_mask(leaf);
362 	const char *cplim;
363 	int length = min(*cp, *cp2);
364 
365 	if (cp3 == 0) {
366 		cp3 = rn_ones;
367 	} else {
368 		length = min(length, *cp3);
369 	}
370 
371 	length = min(length, MAX_KEYLEN);
372 
373 	cplim = cp + length; cp3 += skip; cp2 += skip;
374 	for (cp += skip; cp < cplim; cp++, cp2++, cp3++) {
375 		if ((*cp ^ *cp2) & *cp3) {
376 			return 0;
377 		}
378 	}
379 
380 	return RN_MATCHF(leaf, f, w);
381 }
382 
383 struct radix_node *
rn_match(void * v_arg,struct radix_node_head * head)384 rn_match(void *v_arg, struct radix_node_head *head)
385 {
386 	return rn_match_args(v_arg, head, NULL, NULL);
387 }
388 
389 struct radix_node *
rn_match_args(void * v_arg,struct radix_node_head * head,rn_matchf_t * f,void * w)390 rn_match_args(void *v_arg, struct radix_node_head *head,
391     rn_matchf_t *f, void *w)
392 {
393 	uint8_t vlen0;
394 	caddr_t v = rnarg_unpack(v_arg, &vlen0);
395 	struct radix_node *t = head->rnh_treetop, *x;
396 	caddr_t cp, cp2, cplim, key;
397 	struct radix_node *saved_t, *top = t;
398 	int off = t->rn_offset, matched_off;
399 	uint8_t klen, cmp_len;
400 	int test, b, rn_bit;
401 
402 	/*
403 	 * Open code rn_search(v, top) to avoid overhead of extra
404 	 * subroutine call.
405 	 */
406 	for (; t->rn_bit >= 0;) {
407 		uint8_t test_byte = rnarg_get(v, vlen0, t->rn_offset);
408 		if (t->rn_bmask & test_byte) {
409 			t = t->rn_right;
410 		} else {
411 			t = t->rn_left;
412 		}
413 	}
414 	/*
415 	 * See if we match exactly as a host destination
416 	 * or at least learn how many bits match, for normal mask finesse.
417 	 *
418 	 * It doesn't hurt us to limit how many bytes to check
419 	 * to the length of the mask, since if it matches we had a genuine
420 	 * match and the leaf we have is the most specific one anyway;
421 	 * if it didn't match with a shorter length it would fail
422 	 * with a long one.  This wins big for class B&C netmasks which
423 	 * are probably the most common case...
424 	 */
425 	if (rn_get_mask(t)) {
426 		cmp_len = rn_get_masklen(t);
427 	} else {
428 		cmp_len = vlen0;
429 	}
430 
431 	/*
432 	 * Set the `cmp_len' to the minimal of the 3 lengths:
433 	 * - the length of t's mask (cmp_len)
434 	 * - the length of t's key (klen)
435 	 * - the length of the v argument (vlen)
436 	 */
437 	key = rn_get_key(t, &klen);
438 	cmp_len = (uint8_t)min(min(cmp_len, klen), vlen0);
439 
440 	cp = v + off;
441 	cp2 = key + off;
442 	cplim = v + cmp_len;
443 
444 	for (; cp < cplim; cp++, cp2++) {
445 		if (*cp != *cp2) {
446 			goto on1;
447 		}
448 	}
449 	/*
450 	 * This extra grot is in case we are explicitly asked
451 	 * to look up the default.  Ugh!
452 	 *
453 	 * Never return the root node itself, it seems to cause a
454 	 * lot of confusion.
455 	 */
456 	if (t->rn_flags & RNF_ROOT) {
457 		t = t->rn_dupedkey;
458 	}
459 	if (t == NULL || RN_MATCHF(t, f, w)) {
460 		return t;
461 	} else {
462 		/*
463 		 * Although we found an exact match on the key,
464 		 * f() is looking for some other criteria as well.
465 		 * Continue looking as if the exact match failed.
466 		 */
467 		if (t->rn_parent->rn_flags & RNF_ROOT) {
468 			/* Hit the top; have to give up */
469 			return NULL;
470 		}
471 		b = 0;
472 		goto keeplooking;
473 	}
474 on1:
475 	test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
476 	for (b = 7; (test >>= 1) > 0;) {
477 		b--;
478 	}
479 keeplooking:
480 	matched_off = (int)(cp - v);
481 	b += matched_off << 3;
482 	rn_bit = -1 - b;
483 	/*
484 	 * If there is a host route in a duped-key chain, it will be first.
485 	 */
486 	saved_t = t;
487 	if (rn_get_mask(t) == 0) {
488 		t = t->rn_dupedkey;
489 	}
490 	for (; t; t = t->rn_dupedkey) {
491 		/*
492 		 * Even if we don't match exactly as a host,
493 		 * we may match if the leaf we wound up at is
494 		 * a route to a net.
495 		 */
496 		if (t->rn_flags & RNF_NORMAL) {
497 			if ((rn_bit <= t->rn_bit) && RN_MATCHF(t, f, w)) {
498 				return t;
499 			}
500 		} else if (rn_satisfies_leaf(v, t, matched_off, f, w)) {
501 			return t;
502 		}
503 	}
504 	t = saved_t;
505 	/* start searching up the tree */
506 	do {
507 		struct radix_mask *m;
508 		t = t->rn_parent;
509 		m = t->rn_mklist;
510 		/*
511 		 * If non-contiguous masks ever become important
512 		 * we can restore the masking and open coding of
513 		 * the search and satisfaction test and put the
514 		 * calculation of "off" back before the "do".
515 		 */
516 		while (m) {
517 			if (m->rm_flags & RNF_NORMAL) {
518 				if ((rn_bit <= m->rm_bit) &&
519 				    RN_MATCHF(m->rm_leaf, f, w)) {
520 					return m->rm_leaf;
521 				}
522 			} else {
523 				off = min(t->rn_offset, matched_off);
524 				x = rn_search_m(v, t, rm_get_mask(m));
525 				while (x && rn_get_mask(x) != rm_get_mask(m)) {
526 					x = x->rn_dupedkey;
527 				}
528 				if (x && rn_satisfies_leaf(v, x, off, f, w)) {
529 					return x;
530 				}
531 			}
532 			m = m->rm_mklist;
533 		}
534 	} while (t != top);
535 	return NULL;
536 }
537 
538 #ifdef RN_DEBUG
539 int     rn_nodenum;
540 struct  radix_node *rn_clist;
541 int     rn_saveinfo;
542 int     rn_debug =  1;
543 #endif
544 
545 static struct radix_node *
rn_newpair(const void * v __sized_by (vlen),uint8_t vlen,int b,struct radix_node nodes[2])546 rn_newpair(const void *v __sized_by(vlen), uint8_t vlen, int b, struct radix_node nodes[2])
547 {
548 	struct radix_node *tt = &nodes[0];
549 	struct radix_node *t = &nodes[1];
550 
551 	t->rn_bit = (short)b;
552 	t->rn_bmask = 0x80 >> (b & 7);
553 	t->rn_left = tt;
554 	t->rn_offset = b >> 3;
555 	tt->rn_bit = -1;
556 	rn_set_key(tt, v, vlen);
557 
558 	tt->rn_parent = t;
559 	tt->rn_flags = t->rn_flags = RNF_ACTIVE;
560 	tt->rn_mklist = t->rn_mklist = NULL;
561 #ifdef RN_DEBUG
562 	tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
563 	tt->rn_twin = t;
564 	tt->rn_ybro = rn_clist;
565 	rn_clist = tt;
566 #endif
567 	return t;
568 }
569 
570 static struct radix_node *
rn_insert(void * v_arg,struct radix_node_head * head,int * dupentry,struct radix_node nodes[2])571 rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
572     struct radix_node nodes[2])
573 {
574 	uint8_t vlen;
575 	caddr_t v = rnarg_unpack(v_arg, &vlen);
576 	struct radix_node *top = head->rnh_treetop;
577 	int head_off = top->rn_offset;
578 	struct radix_node *t = rn_search(v_arg, top);
579 	caddr_t cp = v + head_off;
580 	int b;
581 	struct radix_node *tt;
582 	uint8_t test_byte;
583 	/*
584 	 * Find first bit at which v and t->rn_key differ
585 	 */
586 	{
587 		caddr_t cp2 = rn_get_key(t) + head_off;
588 		int cmp_res;
589 		caddr_t cplim = v + vlen;
590 
591 		while (cp < cplim) {
592 			if (*cp2++ != *cp++) {
593 				goto on1;
594 			}
595 		}
596 		*dupentry = 1;
597 		return t;
598 on1:
599 		*dupentry = 0;
600 		cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
601 		for (b = (int)(cp - v) << 3; cmp_res; b--) {
602 			cmp_res >>= 1;
603 		}
604 	}
605 	{
606 		struct radix_node *p, *x = top;
607 		do {
608 			p = x;
609 			test_byte = rnarg_get(v, vlen, x->rn_offset);
610 			if (x->rn_bmask & test_byte) {
611 				x = x->rn_right;
612 			} else {
613 				x = x->rn_left;
614 			}
615 		} while (b > (unsigned) x->rn_bit);
616 		/* x->rn_bit < b && x->rn_bit >= 0 */
617 #ifdef RN_DEBUG
618 		if (rn_debug) {
619 			log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
620 		}
621 #endif
622 		t = rn_newpair(v, vlen, b, nodes);
623 		tt = t->rn_left;
624 		test_byte = rnarg_get(v, vlen, p->rn_offset);
625 		if ((p->rn_bmask & test_byte) == 0) {
626 			p->rn_left = t;
627 		} else {
628 			p->rn_right = t;
629 		}
630 		x->rn_parent = t;
631 		t->rn_parent = p; /* frees x, p as temp vars below */
632 		test_byte = rnarg_get(v, vlen, t->rn_offset);
633 		if ((t->rn_bmask & test_byte) == 0) {
634 			t->rn_right = x;
635 		} else {
636 			t->rn_right = tt;
637 			t->rn_left = x;
638 		}
639 #ifdef RN_DEBUG
640 		if (rn_debug) {
641 			log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
642 		}
643 #endif
644 	}
645 	return tt;
646 }
647 
648 struct radix_node *
rn_addmask(void * n_arg,int search,int skip)649 rn_addmask(void *n_arg, int search, int skip)
650 {
651 	uint8_t mlen0;
652 	caddr_t netmask = rnarg_unpack(n_arg, &mlen0);
653 	struct radix_node *x __single;
654 	struct netmask_rn_entry *nrn_entry;
655 	caddr_t cp, cplim;
656 	int b = 0, mlen, j;
657 	uint8_t cmp_len;
658 	caddr_t key;
659 	int maskduplicated, m0, isnormal;
660 
661 	char addmask_key[MAX_KEYLEN] = {0, };
662 
663 	mlen = min(mlen0, MAX_KEYLEN);
664 	if (skip == 0) {
665 		skip = 1;
666 	}
667 	if (mlen <= skip) {
668 		return mask_rnhead->rnh_nodes;
669 	}
670 	if (skip > 1) {
671 		bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
672 	}
673 	if ((m0 = mlen) > skip) {
674 		bcopy(netmask + skip, addmask_key + skip, mlen - skip);
675 	}
676 	/*
677 	 * Trim trailing zeroes.
678 	 */
679 	for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) {
680 		cp--;
681 	}
682 	mlen = (int)(cp - addmask_key);
683 	if (mlen <= skip) {
684 		return mask_rnhead->rnh_nodes;
685 	}
686 
687 	*addmask_key = (char)mlen;
688 	x = rn_search(addmask_key, rn_masktop);
689 	key = rn_get_key(x, &cmp_len);
690 	if (mlen < cmp_len) {
691 		cmp_len = (int8_t)mlen;
692 	}
693 	if (Bcmp(addmask_key, key, cmp_len) != 0) {
694 		x = NULL;
695 	}
696 	if (x || search) {
697 		return x;
698 	}
699 	nrn_entry = kalloc_type(struct netmask_rn_entry, Z_WAITOK_ZERO_NOFAIL);
700 	netmask = nrn_entry->nrn_netmask;
701 	Bcopy(addmask_key, netmask, mlen);
702 	x = rn_insert(netmask, mask_rnhead, &maskduplicated, nrn_entry->nrn_base.rnb_nodes);
703 	if (maskduplicated) {
704 		log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
705 		kfree_type(struct netmask_rn_entry, nrn_entry);
706 		return x;
707 	}
708 	mask_rnhead->rnh_cnt++;
709 	/*
710 	 * Calculate index of mask, and check for normalcy.
711 	 */
712 	cplim = netmask + mlen;
713 	isnormal = 1;
714 	for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) {
715 		cp++;
716 	}
717 	if (cp != cplim) {
718 		for (j = 0x80; (j & *cp) != 0; j >>= 1) {
719 			b++;
720 		}
721 		if (*cp != normal_chars[b] || cp != (cplim - 1)) {
722 			isnormal = 0;
723 		}
724 	}
725 	b += (cp - netmask) << 3;
726 	x->rn_bit = (short)(-1 - b);
727 	if (isnormal) {
728 		x->rn_flags |= RNF_NORMAL;
729 	}
730 	return x;
731 }
732 
733 static int
734 /* XXX: arbitrary ordering for non-contiguous masks */
rn_lexobetter(void * m_arg,void * n_arg)735 rn_lexobetter(void *m_arg, void *n_arg)
736 {
737 	uint8_t mplen, nlen;
738 	caddr_t mp = rnarg_unpack(m_arg, &mplen);
739 	caddr_t np = rnarg_unpack(n_arg, &nlen);
740 
741 	if (*mp > *np) {
742 		return 1;  /* not really, but need to check longer one first */
743 	}
744 	if (*mp == *np) {
745 		for (int i = 1; i < mplen; ++i) {
746 			if (mp[i] > np[i]) {
747 				return 1;
748 			}
749 		}
750 	}
751 	return 0;
752 }
753 
754 static struct radix_mask *
rn_new_radix_mask(struct radix_node * tt,struct radix_mask * next)755 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
756 {
757 	struct radix_mask *m;
758 
759 	MKGet(m);
760 	m->rm_bit = tt->rn_bit;
761 	m->rm_flags = tt->rn_flags;
762 	if (tt->rn_flags & RNF_NORMAL) {
763 		m->rm_leaf = tt;
764 	} else {
765 		rm_set_mask(m, rn_get_mask(tt), rn_get_masklen(tt));
766 	}
767 	m->rm_mklist = next;
768 	tt->rn_mklist = m;
769 	return m;
770 }
771 
772 struct radix_node *
rn_addroute(void * v_arg,void * n_arg,struct radix_node_head * head,struct radix_node treenodes[2])773 rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
774     struct radix_node treenodes[2])
775 {
776 	uint8_t vlen, mlen0;
777 	caddr_t v = rnarg_unpack(v_arg, &vlen);
778 	caddr_t netmask = rnarg_unpack(n_arg, &mlen0);
779 	uint8_t mlen = mlen0;
780 	struct radix_node *t, *x = NULL, *tt;
781 	struct radix_node *saved_tt, *top = head->rnh_treetop;
782 	short b = 0, b_leaf = 0;
783 	int keyduplicated;
784 	caddr_t mmask;
785 	struct radix_mask *m, **mp;
786 
787 	/*
788 	 * In dealing with non-contiguous masks, there may be
789 	 * many different routes which have the same mask.
790 	 * We will find it useful to have a unique pointer to
791 	 * the mask to speed avoiding duplicate references at
792 	 * nodes and possibly save time in calculating indices.
793 	 */
794 	if (netmask) {
795 		if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) {
796 			return NULL;
797 		}
798 		b_leaf = x->rn_bit;
799 		b = -1 - x->rn_bit;
800 		/*
801 		 * Note: the auxillary mask is stored as a "key".
802 		 */
803 		netmask = rn_get_key(x, &mlen);
804 	}
805 	/*
806 	 * Deal with duplicated keys: attach node to previous instance
807 	 */
808 	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
809 	if (keyduplicated) {
810 		for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
811 			if (rn_get_mask(tt) == netmask) {
812 				return NULL;
813 			}
814 			if (netmask == 0 ||
815 			    (rn_get_mask(tt) != NULL &&
816 			    ((b_leaf < tt->rn_bit)  /* index(netmask) > node */
817 			    || rn_refines(netmask, rn_get_mask(tt))
818 			    || rn_lexobetter(netmask, rn_get_mask(tt))))) {
819 				break;
820 			}
821 		}
822 		/*
823 		 * If the mask is not duplicated, we wouldn't
824 		 * find it among possible duplicate key entries
825 		 * anyway, so the above test doesn't hurt.
826 		 *
827 		 * We sort the masks for a duplicated key the same way as
828 		 * in a masklist -- most specific to least specific.
829 		 * This may require the unfortunate nuisance of relocating
830 		 * the head of the list.
831 		 */
832 		if (tt == saved_tt) {
833 			struct  radix_node *xx = x;
834 			/* link in at head of list */
835 			(tt = treenodes)->rn_dupedkey = t;
836 			tt->rn_flags = t->rn_flags;
837 			tt->rn_parent = x = t->rn_parent;
838 			t->rn_parent = tt;                      /* parent */
839 			if (x->rn_left == t) {
840 				x->rn_left = tt;
841 			} else {
842 				x->rn_right = tt;
843 			}
844 			saved_tt = tt; x = xx;
845 		} else {
846 			(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
847 			t->rn_dupedkey = tt;
848 			tt->rn_parent = t;                      /* parent */
849 			if (tt->rn_dupedkey) {                  /* parent */
850 				tt->rn_dupedkey->rn_parent = tt; /* parent */
851 			}
852 		}
853 #ifdef RN_DEBUG
854 		t = tt + 1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
855 		tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
856 #endif
857 		rn_set_key(tt, v, vlen);
858 		tt->rn_bit = -1;
859 		tt->rn_flags = RNF_ACTIVE;
860 	}
861 	head->rnh_cnt++;
862 	/*
863 	 * Put mask in tree.
864 	 */
865 	if (netmask) {
866 		rn_set_mask(tt, netmask, mlen);
867 		tt->rn_bit = x->rn_bit;
868 		tt->rn_flags |= x->rn_flags & RNF_NORMAL;
869 	}
870 	t = saved_tt->rn_parent;
871 	if (keyduplicated) {
872 		goto on2;
873 	}
874 	b_leaf = -1 - t->rn_bit;
875 	if (t->rn_right == saved_tt) {
876 		x = t->rn_left;
877 	} else {
878 		x = t->rn_right;
879 	}
880 	/* Promote general routes from below */
881 	if (x->rn_bit < 0) {
882 		for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) {
883 			if (rn_get_mask(x) != NULL && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
884 				*mp = m = rn_new_radix_mask(x, NULL);
885 				if (m) {
886 					mp = &m->rm_mklist;
887 				}
888 			}
889 		}
890 	} else if (x->rn_mklist) {
891 		/*
892 		 * Skip over masks whose index is > that of new node
893 		 */
894 		for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
895 			if (m->rm_bit >= b_leaf) {
896 				break;
897 			}
898 		}
899 		t->rn_mklist = m; *mp = NULL;
900 	}
901 on2:
902 	/* Add new route to highest possible ancestor's list */
903 	if ((netmask == 0) || (b > t->rn_bit)) {
904 		return tt; /* can't lift at all */
905 	}
906 	b_leaf = tt->rn_bit;
907 	do {
908 		x = t;
909 		t = t->rn_parent;
910 	} while (b <= t->rn_bit && x != top);
911 	/*
912 	 * Search through routes associated with node to
913 	 * insert new route according to index.
914 	 * Need same criteria as when sorting dupedkeys to avoid
915 	 * double loop on deletion.
916 	 */
917 	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
918 		if (m->rm_bit < b_leaf) {
919 			continue;
920 		}
921 		if (m->rm_bit > b_leaf) {
922 			break;
923 		}
924 		if (m->rm_flags & RNF_NORMAL) {
925 			mmask = rn_get_mask(m->rm_leaf);
926 			if (tt->rn_flags & RNF_NORMAL) {
927 				log(LOG_ERR,
928 				    "Non-unique normal route, mask not entered");
929 				return tt;
930 			}
931 		} else {
932 			mmask = rm_get_mask(m);
933 		}
934 		if (mmask == netmask) {
935 			m->rm_refs++;
936 			tt->rn_mklist = m;
937 			return tt;
938 		}
939 		if (rn_refines(netmask, mmask)
940 		    || rn_lexobetter(netmask, mmask)) {
941 			break;
942 		}
943 	}
944 	*mp = rn_new_radix_mask(tt, *mp);
945 	return tt;
946 }
947 
948 struct radix_node *
rn_delete(void * v_arg,void * netmask_arg,struct radix_node_head * head)949 rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
950 {
951 	uint8_t vlen, mlen0;
952 	caddr_t v = rnarg_unpack(v_arg, &vlen);
953 	caddr_t netmask = rnarg_unpack(netmask_arg, &mlen0);
954 	uint8_t masklen = mlen0, key_cmp_len, tt_key_len;
955 
956 	struct radix_node *t __single, *p __single, *x __single, *tt __single;
957 	struct radix_mask *m, *saved_m, **mp;
958 	struct radix_node *dupedkey, *saved_tt, *top;
959 	int b, head_off;
960 
961 	x = top = head->rnh_treetop;
962 	tt = saved_tt = rn_search(v, x);
963 
964 	/*
965 	 * Verify that the found node (`tt'), is valid, and that it can
966 	 * be compared against `v_arg'.
967 	 */
968 	if (tt == NULL) {
969 		log(LOG_ERR, "rn_delete: key not found (key_len=%d)\n", vlen);
970 		return NULL;
971 	}
972 
973 	head_off = x->rn_offset;
974 	tt_key_len = rn_get_keylen(tt);
975 	key_cmp_len = (uint8_t)min(vlen, tt_key_len);
976 
977 	if (key_cmp_len < head_off) {
978 		log(LOG_ERR, "rn_delete: key too short (cmp_len=%d, head_offset=%d)\n",
979 		    key_cmp_len, head_off);
980 		return NULL;
981 	}
982 
983 	if (Bcmp(v + head_off, rn_get_key(tt) + head_off, key_cmp_len - head_off)) {
984 		log(LOG_ERR, "rn_delete: key mismatch (cmp_len=%d, head_offset=%d)\n",
985 		    key_cmp_len, head_off);
986 		return NULL;
987 	}
988 
989 	/*
990 	 * Delete our route from mask lists.
991 	 */
992 	if (netmask) {
993 		if ((x = rn_addmask(netmask, 1, head_off)) == 0) {
994 			return NULL;
995 		}
996 		netmask = rn_get_key(x, &masklen);
997 		while (rn_get_mask(tt) != netmask) {
998 			if ((tt = tt->rn_dupedkey) == 0) {
999 				return NULL;
1000 			}
1001 		}
1002 	}
1003 	if (rn_get_mask(tt) == 0 || (saved_m = m = tt->rn_mklist) == 0) {
1004 		goto on1;
1005 	}
1006 	if (tt->rn_flags & RNF_NORMAL) {
1007 		if (m->rm_leaf != tt || m->rm_refs > 0) {
1008 			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
1009 			return NULL;  /* dangling ref could cause disaster */
1010 		}
1011 	} else {
1012 		if (rm_get_mask(m) != rn_get_mask(tt)) {
1013 			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
1014 			goto on1;
1015 		}
1016 		if (--m->rm_refs >= 0) {
1017 			goto on1;
1018 		}
1019 	}
1020 	b = -1 - tt->rn_bit;
1021 	t = saved_tt->rn_parent;
1022 	if (b > t->rn_bit) {
1023 		goto on1; /* Wasn't lifted at all */
1024 	}
1025 	do {
1026 		x = t;
1027 		t = t->rn_parent;
1028 	} while (b <= t->rn_bit && x != top);
1029 	for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
1030 		if (m == saved_m) {
1031 			*mp = m->rm_mklist;
1032 			if (tt->rn_mklist == m) {
1033 				tt->rn_mklist = *mp;
1034 			}
1035 			MKFree(m);
1036 			break;
1037 		}
1038 	}
1039 	if (m == 0) {
1040 		log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
1041 		if (tt->rn_flags & RNF_NORMAL) {
1042 			return NULL; /* Dangling ref to us */
1043 		}
1044 	}
1045 on1:
1046 	/*
1047 	 * Eliminate us from tree
1048 	 */
1049 	if (tt->rn_flags & RNF_ROOT) {
1050 		return NULL;
1051 	}
1052 	head->rnh_cnt--;
1053 #ifdef RN_DEBUG
1054 	/* Get us out of the creation list */
1055 	for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {
1056 	}
1057 	if (t) {
1058 		t->rn_ybro = tt->rn_ybro;
1059 	}
1060 #endif
1061 	t = tt->rn_parent;
1062 	dupedkey = saved_tt->rn_dupedkey;
1063 	if (dupedkey) {
1064 		/*
1065 		 * at this point, tt is the deletion target and saved_tt
1066 		 * is the head of the dupekey chain
1067 		 */
1068 		if (tt == saved_tt) {
1069 			/* remove from head of chain */
1070 			x = dupedkey;
1071 			x->rn_parent = t;
1072 			if (t->rn_left == tt) {
1073 				t->rn_left = x;
1074 			} else {
1075 				t->rn_right = x;
1076 			}
1077 		} else {
1078 			/* find node in front of tt on the chain */
1079 			for (x = p = saved_tt; p && p->rn_dupedkey != tt;) {
1080 				p = p->rn_dupedkey;
1081 			}
1082 			if (p) {
1083 				p->rn_dupedkey = tt->rn_dupedkey;
1084 				if (tt->rn_dupedkey) {          /* parent */
1085 					tt->rn_dupedkey->rn_parent = p;
1086 				}
1087 				/* parent */
1088 			} else {
1089 				log(LOG_ERR, "rn_delete: couldn't find us\n");
1090 			}
1091 		}
1092 		t = rn_lexical_parent(tt);
1093 		if (t->rn_flags & RNF_ACTIVE) {
1094 #ifndef RN_DEBUG
1095 			x = rn_lexical_parent(x);
1096 			*x = *t;
1097 			p = t->rn_parent;
1098 #else
1099 			b = t->rn_info;
1100 			*++x = *t;
1101 			t->rn_info = b;
1102 			p = t->rn_parent;
1103 #endif
1104 			if (p->rn_left == t) {
1105 				p->rn_left = x;
1106 			} else {
1107 				p->rn_right = x;
1108 			}
1109 			x->rn_left->rn_parent = x;
1110 			x->rn_right->rn_parent = x;
1111 		}
1112 		goto out;
1113 	}
1114 	if (t->rn_left == tt) {
1115 		x = t->rn_right;
1116 	} else {
1117 		x = t->rn_left;
1118 	}
1119 	p = t->rn_parent;
1120 	if (p->rn_right == t) {
1121 		p->rn_right = x;
1122 	} else {
1123 		p->rn_left = x;
1124 	}
1125 	x->rn_parent = p;
1126 	/*
1127 	 * Demote routes attached to us.
1128 	 */
1129 	if (t->rn_mklist) {
1130 		if (x->rn_bit >= 0) {
1131 			for (mp = &x->rn_mklist; (m = *mp);) {
1132 				mp = &m->rm_mklist;
1133 			}
1134 			*mp = t->rn_mklist;
1135 		} else {
1136 			/* If there are any key,mask pairs in a sibling
1137 			 *  duped-key chain, some subset will appear sorted
1138 			 *  in the same order attached to our mklist */
1139 			for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) {
1140 				if (m == x->rn_mklist) {
1141 					struct radix_mask *mm = m->rm_mklist;
1142 					x->rn_mklist = NULL;
1143 					if (--(m->rm_refs) < 0) {
1144 						MKFree(m);
1145 					}
1146 					m = mm;
1147 				}
1148 			}
1149 			if (m) {
1150 				log(LOG_ERR, "rn_delete: Orphaned Mask "
1151 				    "0x%llx at 0x%llx\n",
1152 				    (uint64_t)VM_KERNEL_ADDRPERM(m),
1153 				    (uint64_t)VM_KERNEL_ADDRPERM(x));
1154 			}
1155 		}
1156 	}
1157 	/*
1158 	 * We may be holding an active internal node in the tree.
1159 	 */
1160 	x = rn_lexical_parent(tt);
1161 	if (t != x) {
1162 #ifndef RN_DEBUG
1163 		*t = *x;
1164 #else
1165 		b = t->rn_info;
1166 		*t = *x;
1167 		t->rn_info = b;
1168 #endif
1169 		t->rn_left->rn_parent = t;
1170 		t->rn_right->rn_parent = t;
1171 		p = x->rn_parent;
1172 		if (p->rn_left == x) {
1173 			p->rn_left = t;
1174 		} else {
1175 			p->rn_right = t;
1176 		}
1177 	}
1178 out:
1179 	x = rn_lexical_parent(tt);
1180 	x->rn_flags &= ~RNF_ACTIVE;
1181 	tt->rn_flags &= ~RNF_ACTIVE;
1182 
1183 	return tt;
1184 }
1185 
1186 /*
1187  * This is the same as rn_walktree() except for the parameters and the
1188  * exit.
1189  */
1190 static int
rn_walktree_from(struct radix_node_head * h,void * a,void * m,walktree_f_t * f,void * w)1191 rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f,
1192     void *w)
1193 {
1194 	int error;
1195 	uint8_t alen, mlen;
1196 	struct radix_node *base, *next;
1197 	caddr_t xa = rnarg_unpack(a, &alen);
1198 	caddr_t xm = rnarg_unpack(m, &mlen);
1199 	struct radix_node *rn, *last;
1200 	int stopping;
1201 	int lastb;
1202 	int rnh_cnt;
1203 
1204 	/*
1205 	 * This gets complicated because we may delete the node while
1206 	 * applying the function f to it; we cannot simply use the next
1207 	 * leaf as the successor node in advance, because that leaf may
1208 	 * be removed as well during deletion when it is a clone of the
1209 	 * current node.  When that happens, we would end up referring
1210 	 * to an already-freed radix node as the successor node.  To get
1211 	 * around this issue, if we detect that the radix tree has changed
1212 	 * in dimension (smaller than before), we simply restart the walk
1213 	 * from the top of tree.
1214 	 */
1215 restart:
1216 	last = NULL;
1217 	stopping = 0;
1218 	rnh_cnt = h->rnh_cnt;
1219 
1220 	/*
1221 	 * rn_search_m is sort-of-open-coded here.
1222 	 */
1223 	for (rn = h->rnh_treetop; rn->rn_bit >= 0;) {
1224 		last = rn;
1225 		uint8_t test_byte;
1226 		test_byte = rnarg_get(xm, mlen, rn->rn_offset);
1227 		if (!(rn->rn_bmask & test_byte)) {
1228 			break;
1229 		}
1230 		test_byte = rnarg_get(xa, alen, rn->rn_offset);
1231 		if (rn->rn_bmask & test_byte) {
1232 			rn = rn->rn_right;
1233 		} else {
1234 			rn = rn->rn_left;
1235 		}
1236 	}
1237 
1238 	/*
1239 	 * Two cases: either we stepped off the end of our mask,
1240 	 * in which case last == rn, or we reached a leaf, in which
1241 	 * case we want to start from the last node we looked at.
1242 	 * Either way, last is the node we want to start from.
1243 	 */
1244 	rn = last;
1245 	lastb = rn->rn_bit;
1246 
1247 	/* First time through node, go left */
1248 	while (rn->rn_bit >= 0) {
1249 		rn = rn->rn_left;
1250 	}
1251 
1252 	while (!stopping) {
1253 		base = rn;
1254 		/* If at right child go back up, otherwise, go right */
1255 		while (rn->rn_parent->rn_right == rn
1256 		    && !(rn->rn_flags & RNF_ROOT)) {
1257 			rn = rn->rn_parent;
1258 
1259 			/* if went up beyond last, stop */
1260 			if (rn->rn_bit <= lastb) {
1261 				stopping = 1;
1262 				/*
1263 				 * XXX we should jump to the 'Process leaves'
1264 				 * part, because the values of 'rn' and 'next'
1265 				 * we compute will not be used. Not a big deal
1266 				 * because this loop will terminate, but it is
1267 				 * inefficient and hard to understand!
1268 				 */
1269 			}
1270 		}
1271 
1272 		/*
1273 		 * The following code (bug fix) inherited from FreeBSD is
1274 		 * currently disabled, because our implementation uses the
1275 		 * RTF_PRCLONING scheme that has been abandoned in current
1276 		 * FreeBSD release.  The scheme involves setting such a flag
1277 		 * for the default route entry, and therefore all off-link
1278 		 * destinations would become clones of that entry.  Enabling
1279 		 * the following code would be problematic at this point,
1280 		 * because the removal of default route would cause only
1281 		 * the left-half of the tree to be traversed, leaving the
1282 		 * right-half untouched.  If there are clones of the entry
1283 		 * that reside in that right-half, they would not be deleted
1284 		 * and would linger around until they expire or explicitly
1285 		 * deleted, which is a very bad thing.
1286 		 *
1287 		 * This code should be uncommented only after we get rid
1288 		 * of the RTF_PRCLONING scheme.
1289 		 */
1290 #if 0
1291 		/*
1292 		 * At the top of the tree, no need to traverse the right
1293 		 * half, prevent the traversal of the entire tree in the
1294 		 * case of default route.
1295 		 */
1296 		if (rn->rn_parent->rn_flags & RNF_ROOT) {
1297 			stopping = 1;
1298 		}
1299 #endif
1300 
1301 		/* Find the next *leaf* to start from */
1302 		for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) {
1303 			rn = rn->rn_left;
1304 		}
1305 		next = rn;
1306 		/* Process leaves */
1307 		while ((rn = base) != 0) {
1308 			base = rn->rn_dupedkey;
1309 			if (!(rn->rn_flags & RNF_ROOT)
1310 			    && (error = (*f)(rn, w))) {
1311 				return error;
1312 			}
1313 		}
1314 		/* If one or more nodes got deleted, restart from top */
1315 		if (h->rnh_cnt < rnh_cnt) {
1316 			goto restart;
1317 		}
1318 		rn = next;
1319 		if (rn->rn_flags & RNF_ROOT) {
1320 			stopping = 1;
1321 		}
1322 	}
1323 	return 0;
1324 }
1325 
1326 static int
rn_walktree(struct radix_node_head * h,walktree_f_t * f,void * w)1327 rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
1328 {
1329 	int error;
1330 	struct radix_node *base, *next;
1331 	struct radix_node *rn;
1332 	int rnh_cnt;
1333 
1334 	/*
1335 	 * This gets complicated because we may delete the node while
1336 	 * applying the function f to it; we cannot simply use the next
1337 	 * leaf as the successor node in advance, because that leaf may
1338 	 * be removed as well during deletion when it is a clone of the
1339 	 * current node.  When that happens, we would end up referring
1340 	 * to an already-freed radix node as the successor node.  To get
1341 	 * around this issue, if we detect that the radix tree has changed
1342 	 * in dimension (smaller than before), we simply restart the walk
1343 	 * from the top of tree.
1344 	 */
1345 restart:
1346 	rn = h->rnh_treetop;
1347 	rnh_cnt = h->rnh_cnt;
1348 
1349 	/* First time through node, go left */
1350 	while (rn->rn_bit >= 0) {
1351 		rn = rn->rn_left;
1352 	}
1353 	for (;;) {
1354 		base = rn;
1355 		/* If at right child go back up, otherwise, go right */
1356 		while (rn->rn_parent->rn_right == rn &&
1357 		    (rn->rn_flags & RNF_ROOT) == 0) {
1358 			rn = rn->rn_parent;
1359 		}
1360 		/* Find the next *leaf* to start from */
1361 		for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) {
1362 			rn = rn->rn_left;
1363 		}
1364 		next = rn;
1365 		/* Process leaves */
1366 		while ((rn = base) != NULL) {
1367 			base = rn->rn_dupedkey;
1368 			if (!(rn->rn_flags & RNF_ROOT)
1369 			    && (error = (*f)(rn, w))) {
1370 				return error;
1371 			}
1372 		}
1373 		/* If one or more nodes got deleted, restart from top */
1374 		if (h->rnh_cnt < rnh_cnt) {
1375 			goto restart;
1376 		}
1377 		rn = next;
1378 		if (rn->rn_flags & RNF_ROOT) {
1379 			return 0;
1380 		}
1381 	}
1382 	/* NOTREACHED */
1383 }
1384 
1385 int
rn_inithead(void ** head,int off)1386 rn_inithead(void **head, int off)
1387 {
1388 	struct radix_node_head *rnh;
1389 	struct radix_node *t, *tt, *ttt;
1390 	if (off > INT8_MAX) {
1391 		return 0;
1392 	}
1393 	if (*head) {
1394 		return 1;
1395 	}
1396 
1397 	rnh = zalloc_flags(radix_node_head_zone, Z_WAITOK_ZERO_NOFAIL);
1398 	*head = rnh;
1399 	t = rn_newpair(rn_zeros, (int8_t)MAX_KEYLEN, off, rnh->rnh_nodes);
1400 	ttt = rnh->rnh_nodes + 2;
1401 	t->rn_right = ttt;
1402 	t->rn_parent = t;
1403 	tt = t->rn_left;
1404 	tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
1405 	tt->rn_bit = (short)(-1 - off);
1406 	*ttt = *tt;
1407 	rn_set_key(ttt, rn_ones, (int8_t)MAX_KEYLEN);
1408 	rnh->rnh_addaddr = rn_addroute;
1409 	rnh->rnh_deladdr = rn_delete;
1410 	rnh->rnh_matchaddr = rn_match;
1411 	rnh->rnh_matchaddr_args = rn_match_args;
1412 	rnh->rnh_lookup = rn_lookup;
1413 	rnh->rnh_lookup_args = rn_lookup_args;
1414 	rnh->rnh_walktree = rn_walktree;
1415 	rnh->rnh_walktree_from = rn_walktree_from;
1416 	rnh->rnh_treetop = t;
1417 	rnh->rnh_cnt = 3;
1418 	return 1;
1419 }
1420 
1421 void
rn_init(void)1422 rn_init(void)
1423 {
1424 	struct domain *dom;
1425 
1426 	/*
1427 	 * Validate that no domain has max key that exceeds the MAX_KEYLEN constant.
1428 	 * This is really not expected to happen unless we introduce a new domain.
1429 	 * In such case, the MAX_KEYLEN constant will need to be updated,
1430 	 * along with the layout of `struct rn_base_entry'.
1431 	 *
1432 	 * N.B. lock already held when rn_init is called.
1433 	 */
1434 	TAILQ_FOREACH(dom, &domains, dom_entry) {
1435 		if (MAX_KEYLEN < dom->dom_maxrtkey) {
1436 			log(LOG_ERR, "rn_init: encountered domain %s with max key len %d exceeding the limit %d",
1437 			    dom->dom_name,
1438 			    dom->dom_maxrtkey,
1439 			    MAX_KEYLEN);
1440 			return;
1441 		}
1442 	}
1443 
1444 	if (rn_inithead((void **)&mask_rnhead, 0) == 0) {
1445 		panic("rn_init 2");
1446 	}
1447 }
1448