1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988, 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)radix.c 8.4 (Berkeley) 11/2/94
61 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.2 2001/03/06 00:56:50 obrien Exp $
62 */
63
64 /*
65 * Routines to build and maintain radix trees for routing lookups.
66 */
67 #ifndef _RADIX_H_
68 #include <sys/cdefs.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/domain.h>
72 #include <sys/syslog.h>
73 #include <net/radix.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <kern/locks.h>
77 #endif
78
79 static int rn_walktree_from(struct radix_node_head *h, void *a,
80 void *m, walktree_f_t *f, void *w);
81 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
82 static struct radix_node *rn_insert(void *, struct radix_node_head *, int *, struct radix_node[2]);
83 static struct radix_node *rn_newpair(const void * __sized_by(vlen), uint8_t vlen, int, struct radix_node[2]);
84 static struct radix_node *rn_search(void *, struct radix_node *);
85 static struct radix_node *rn_search_m(void *, struct radix_node *, void *);
86
87 static struct radix_mask *rn_mkfreelist;
88 static struct radix_node_head *mask_rnhead;
89 static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
90
91 KALLOC_TYPE_DEFINE(radix_node_head_zone, struct radix_node_head, KT_DEFAULT);
92
93
94 #define MAX_KEYLEN 32
95 #define MAX_KEYLEN_BMASK 0x1F
96
97 /*
98 * Constant size buffers that are used for the netmask radix maintenance.
99 *
100 * rn_ones - buffer with all bits set to 1, used when constructing new keys.
101 * rn_zeros - buffer with all bits set to 0, used when constructing new keys.
102 */
103
104 /*
105 * Constant size buffers that are used for the netmask radix maintenance.
106 *
107 * rn_ones - buffer with all bits set to 1, used when constructing new keys.
108 * rn_zeros - buffer with all bits set to 0, used when constructing new keys.
109 */
110 static const char rn_zeros[MAX_KEYLEN] = {
111 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
112 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
113 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
114 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
115 };
116
117 static const char rn_ones[MAX_KEYLEN] = {
118 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
119 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
120 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
121 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
122 };
123
124 #define rn_masktop (mask_rnhead->rnh_treetop)
125 #undef Bcmp
126 #define Bcmp(a, b, l) \
127 (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (uint32_t)l))
128
129 static int rn_lexobetter(void *m_arg, void *n_arg);
130 static struct radix_mask * rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next);
131 static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip,
132 rn_matchf_t *f, void *w);
133
134 #define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg))
135
136
137 /*
138 * Netmask radix tree.
139 *
140 * Most netmasks are going to be same for the different routes.
141 * Because of that, it is important to avoid wasting memory
142 * for the duplicate copies of same mask bitpatterns.
143 *
144 * The radix datastructure solves this by using another radix tree,
145 * which is keyed by the netmask bits.
146 *
147 */
148
149 /*
150 * Radix tree glue.
151 */
152 struct rn_base_entry {
153 union {
154 struct {
155 struct radix_node tt;
156 struct radix_node t;
157 } _split_nodes;
158 #define rnb_tt _split_nodes.tt
159 #define rnb_t _split_nodes.t
160 struct radix_node rnb_nodes[2];
161 };
162 };
163
164 static struct radix_node *
rn_lexical_parent(struct radix_node * tt)165 rn_lexical_parent(struct radix_node *tt)
166 {
167 struct rn_base_entry *nrn;
168 nrn = __container_of(tt, struct rn_base_entry, rnb_tt);
169 return &nrn->rnb_t;
170 }
171
172 /*
173 * The entry in the netmask radix tree.
174 */
175 struct netmask_rn_entry {
176 struct rn_base_entry nrn_base;
177 #define nrn_tt nrn_base.rnb_tt
178 #define nrn_t nrn_base.rnb_t
179 char nrn_netmask[MAX_KEYLEN];
180 };
181
182 /*
183 * The data structure for the keys is a radix tree with one way
184 * branching removed. The index rn_bit at an internal node n represents a bit
185 * position to be tested. The tree is arranged so that all descendants
186 * of a node n have keys whose bits all agree up to position rn_bit - 1.
187 * (We say the index of n is rn_bit.)
188 *
189 * There is at least one descendant which has a one bit at position rn_bit,
190 * and at least one with a zero there.
191 *
192 * A route is determined by a pair of key and mask. We require that the
193 * bit-wise logical and of the key and mask to be the key.
194 * We define the index of a route to associated with the mask to be
195 * the first bit number in the mask where 0 occurs (with bit number 0
196 * representing the highest order bit).
197 *
198 * We say a mask is normal if every bit is 0, past the index of the mask.
199 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
200 * and m is a normal mask, then the route applies to every descendant of n.
201 * If the index(m) < rn_bit, this implies the trailing last few bits of k
202 * before bit b are all 0, (and hence consequently true of every descendant
203 * of n), so the route applies to all descendants of the node as well.
204 *
205 * Similar logic shows that a non-normal mask m such that
206 * index(m) <= index(n) could potentially apply to many children of n.
207 * Thus, for each non-host route, we attach its mask to a list at an internal
208 * node as high in the tree as we can go.
209 *
210 * The present version of the code makes use of normal routes in short-
211 * circuiting an explict mask and compare operation when testing whether
212 * a key satisfies a normal route, and also in remembering the unique leaf
213 * that governs a subtree.
214 */
215
216 static inline void*
217 __sized_by(*arglen)
218 __attribute__((always_inline))
rnarg_unpack(void * packed_arg,uint8_t * arglen)219 rnarg_unpack(void *packed_arg, uint8_t *arglen)
220 {
221 if (!packed_arg) {
222 *arglen = 0;
223 return NULL;
224 }
225 *arglen = *((uint8_t * __single)packed_arg);
226 return __unsafe_forge_bidi_indexable(void *, packed_arg, *arglen);
227 }
228
229 static inline char
230 __attribute__((always_inline))
rnarg_get(caddr_t rnarg __sized_by (arglen),uint8_t arglen,unsigned int offset)231 rnarg_get(caddr_t rnarg __sized_by(arglen), uint8_t arglen, unsigned int offset)
232 {
233 if (arglen <= offset) {
234 return 0;
235 }
236 return rnarg[offset];
237 }
238
239
240 static struct radix_node *
rn_search(void * v_arg,struct radix_node * head)241 rn_search(void *v_arg, struct radix_node *head)
242 {
243 struct radix_node *x = head;
244 uint8_t vlen = 0;
245 caddr_t v = rnarg_unpack(v_arg, &vlen);
246
247 while (x->rn_bit >= 0) {
248 if (x->rn_bmask & rnarg_get(v, vlen, x->rn_offset)) {
249 x = x->rn_right;
250 } else {
251 x = x->rn_left;
252 }
253 }
254 return x;
255 }
256
257 static struct radix_node *
rn_search_m(void * v_arg,struct radix_node * head,void * m_arg)258 rn_search_m(void *v_arg, struct radix_node *head, void *m_arg)
259 {
260 struct radix_node *x = head;
261 uint8_t vlen = 0;
262 caddr_t v = rnarg_unpack(v_arg, &vlen);
263 uint8_t mlen = 0;
264 caddr_t m = rnarg_unpack(m_arg, &mlen);
265
266 while (x->rn_bit >= 0) {
267 if ((x->rn_bmask & rnarg_get(m, mlen, x->rn_offset)) &&
268 (x->rn_bmask & rnarg_get(v, vlen, x->rn_offset))) {
269 x = x->rn_right;
270 } else {
271 x = x->rn_left;
272 }
273 }
274 return x;
275 }
276
277 int
rn_refines(void * m_arg,void * n_arg)278 rn_refines(void *m_arg, void *n_arg)
279 {
280 uint8_t mlen, nlen;
281 caddr_t m = rnarg_unpack(m_arg, &mlen);
282 caddr_t n = rnarg_unpack(n_arg, &nlen);
283 caddr_t lim, lim2 = lim = n + nlen;
284 int longer = nlen - mlen;
285 n++;
286 m++;
287 int masks_are_equal = 1;
288
289 if (longer > 0) {
290 lim -= longer;
291 }
292 while (n < lim) {
293 if (*n & ~(*m)) {
294 return 0;
295 }
296 if (*n++ != *m++) {
297 masks_are_equal = 0;
298 }
299 }
300 while (n < lim2) {
301 if (*n++) {
302 return 0;
303 }
304 }
305 if (masks_are_equal && (longer < 0)) {
306 for (lim2 = m - longer; m < lim2;) {
307 if (*m++) {
308 return 1;
309 }
310 }
311 }
312 return !masks_are_equal;
313 }
314
315 struct radix_node *
rn_lookup(void * v_arg,void * m_arg,struct radix_node_head * head)316 rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
317 {
318 return rn_lookup_args(v_arg, m_arg, head, NULL, NULL);
319 }
320
321 struct radix_node *
rn_lookup_args(void * v_arg,void * m_arg,struct radix_node_head * head,rn_matchf_t * f,void * w)322 rn_lookup_args(void *v_arg, void *m_arg, struct radix_node_head *head,
323 rn_matchf_t *f, void *w)
324 {
325 struct radix_node *x;
326 caddr_t netmask = NULL;
327
328 if (m_arg) {
329 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
330 if (x == 0) {
331 return NULL;
332 }
333 /*
334 * Note: the auxillary mask is stored as a "key".
335 */
336 netmask = rn_get_key(x);
337 }
338 x = rn_match_args(v_arg, head, f, w);
339 if (x && netmask) {
340 while (x && rn_get_mask(x) != netmask) {
341 x = x->rn_dupedkey;
342 }
343 }
344 return x;
345 }
346
347 /*
348 * Returns true if address 'trial' has no bits differing from the
349 * leaf's key when compared under the leaf's mask. In other words,
350 * returns true when 'trial' matches leaf. If a leaf-matching
351 * routine is passed in, it is also used to find a match on the
352 * conditions defined by the caller of rn_match.
353 */
354 static int
rn_satisfies_leaf(char * trial,struct radix_node * leaf,int skip,rn_matchf_t * f,void * w)355 rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip,
356 rn_matchf_t *f, void *w)
357 {
358 uint8_t cplen;
359 const char *cp = rnarg_unpack(trial, &cplen);
360 const char *cp2 = rn_get_key(leaf);
361 const char *cp3 = rn_get_mask(leaf);
362 const char *cplim;
363 int length = min(*cp, *cp2);
364
365 if (cp3 == 0) {
366 cp3 = rn_ones;
367 } else {
368 length = min(length, *cp3);
369 }
370
371 length = min(length, MAX_KEYLEN);
372
373 cplim = cp + length; cp3 += skip; cp2 += skip;
374 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) {
375 if ((*cp ^ *cp2) & *cp3) {
376 return 0;
377 }
378 }
379
380 return RN_MATCHF(leaf, f, w);
381 }
382
383 struct radix_node *
rn_match(void * v_arg,struct radix_node_head * head)384 rn_match(void *v_arg, struct radix_node_head *head)
385 {
386 return rn_match_args(v_arg, head, NULL, NULL);
387 }
388
389 struct radix_node *
rn_match_args(void * v_arg,struct radix_node_head * head,rn_matchf_t * f,void * w)390 rn_match_args(void *v_arg, struct radix_node_head *head,
391 rn_matchf_t *f, void *w)
392 {
393 uint8_t vlen0;
394 caddr_t v = rnarg_unpack(v_arg, &vlen0);
395 struct radix_node *t = head->rnh_treetop, *x;
396 caddr_t cp, cp2, cplim, key;
397 struct radix_node *saved_t, *top = t;
398 int off = t->rn_offset, matched_off;
399 uint8_t klen, cmp_len;
400 int test, b, rn_bit;
401
402 /*
403 * Open code rn_search(v, top) to avoid overhead of extra
404 * subroutine call.
405 */
406 for (; t->rn_bit >= 0;) {
407 uint8_t test_byte = rnarg_get(v, vlen0, t->rn_offset);
408 if (t->rn_bmask & test_byte) {
409 t = t->rn_right;
410 } else {
411 t = t->rn_left;
412 }
413 }
414 /*
415 * See if we match exactly as a host destination
416 * or at least learn how many bits match, for normal mask finesse.
417 *
418 * It doesn't hurt us to limit how many bytes to check
419 * to the length of the mask, since if it matches we had a genuine
420 * match and the leaf we have is the most specific one anyway;
421 * if it didn't match with a shorter length it would fail
422 * with a long one. This wins big for class B&C netmasks which
423 * are probably the most common case...
424 */
425 if (rn_get_mask(t)) {
426 cmp_len = rn_get_masklen(t);
427 } else {
428 cmp_len = vlen0;
429 }
430
431 /*
432 * Set the `cmp_len' to the minimal of the 3 lengths:
433 * - the length of t's mask (cmp_len)
434 * - the length of t's key (klen)
435 * - the length of the v argument (vlen)
436 */
437 key = rn_get_key(t, &klen);
438 cmp_len = (uint8_t)min(min(cmp_len, klen), vlen0);
439
440 cp = v + off;
441 cp2 = key + off;
442 cplim = v + cmp_len;
443
444 for (; cp < cplim; cp++, cp2++) {
445 if (*cp != *cp2) {
446 goto on1;
447 }
448 }
449 /*
450 * This extra grot is in case we are explicitly asked
451 * to look up the default. Ugh!
452 *
453 * Never return the root node itself, it seems to cause a
454 * lot of confusion.
455 */
456 if (t->rn_flags & RNF_ROOT) {
457 t = t->rn_dupedkey;
458 }
459 if (t == NULL || RN_MATCHF(t, f, w)) {
460 return t;
461 } else {
462 /*
463 * Although we found an exact match on the key,
464 * f() is looking for some other criteria as well.
465 * Continue looking as if the exact match failed.
466 */
467 if (t->rn_parent->rn_flags & RNF_ROOT) {
468 /* Hit the top; have to give up */
469 return NULL;
470 }
471 b = 0;
472 goto keeplooking;
473 }
474 on1:
475 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
476 for (b = 7; (test >>= 1) > 0;) {
477 b--;
478 }
479 keeplooking:
480 matched_off = (int)(cp - v);
481 b += matched_off << 3;
482 rn_bit = -1 - b;
483 /*
484 * If there is a host route in a duped-key chain, it will be first.
485 */
486 saved_t = t;
487 if (rn_get_mask(t) == 0) {
488 t = t->rn_dupedkey;
489 }
490 for (; t; t = t->rn_dupedkey) {
491 /*
492 * Even if we don't match exactly as a host,
493 * we may match if the leaf we wound up at is
494 * a route to a net.
495 */
496 if (t->rn_flags & RNF_NORMAL) {
497 if ((rn_bit <= t->rn_bit) && RN_MATCHF(t, f, w)) {
498 return t;
499 }
500 } else if (rn_satisfies_leaf(v, t, matched_off, f, w)) {
501 return t;
502 }
503 }
504 t = saved_t;
505 /* start searching up the tree */
506 do {
507 struct radix_mask *m;
508 t = t->rn_parent;
509 m = t->rn_mklist;
510 /*
511 * If non-contiguous masks ever become important
512 * we can restore the masking and open coding of
513 * the search and satisfaction test and put the
514 * calculation of "off" back before the "do".
515 */
516 while (m) {
517 if (m->rm_flags & RNF_NORMAL) {
518 if ((rn_bit <= m->rm_bit) &&
519 RN_MATCHF(m->rm_leaf, f, w)) {
520 return m->rm_leaf;
521 }
522 } else {
523 off = min(t->rn_offset, matched_off);
524 x = rn_search_m(v, t, rm_get_mask(m));
525 while (x && rn_get_mask(x) != rm_get_mask(m)) {
526 x = x->rn_dupedkey;
527 }
528 if (x && rn_satisfies_leaf(v, x, off, f, w)) {
529 return x;
530 }
531 }
532 m = m->rm_mklist;
533 }
534 } while (t != top);
535 return NULL;
536 }
537
538 #ifdef RN_DEBUG
539 int rn_nodenum;
540 struct radix_node *rn_clist;
541 int rn_saveinfo;
542 int rn_debug = 1;
543 #endif
544
545 static struct radix_node *
rn_newpair(const void * v __sized_by (vlen),uint8_t vlen,int b,struct radix_node nodes[2])546 rn_newpair(const void *v __sized_by(vlen), uint8_t vlen, int b, struct radix_node nodes[2])
547 {
548 struct radix_node *tt = &nodes[0];
549 struct radix_node *t = &nodes[1];
550
551 t->rn_bit = (short)b;
552 t->rn_bmask = 0x80 >> (b & 7);
553 t->rn_left = tt;
554 t->rn_offset = b >> 3;
555 tt->rn_bit = -1;
556 rn_set_key(tt, v, vlen);
557
558 tt->rn_parent = t;
559 tt->rn_flags = t->rn_flags = RNF_ACTIVE;
560 tt->rn_mklist = t->rn_mklist = NULL;
561 #ifdef RN_DEBUG
562 tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
563 tt->rn_twin = t;
564 tt->rn_ybro = rn_clist;
565 rn_clist = tt;
566 #endif
567 return t;
568 }
569
570 static struct radix_node *
rn_insert(void * v_arg,struct radix_node_head * head,int * dupentry,struct radix_node nodes[2])571 rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry,
572 struct radix_node nodes[2])
573 {
574 uint8_t vlen;
575 caddr_t v = rnarg_unpack(v_arg, &vlen);
576 struct radix_node *top = head->rnh_treetop;
577 int head_off = top->rn_offset;
578 struct radix_node *t = rn_search(v_arg, top);
579 caddr_t cp = v + head_off;
580 int b;
581 struct radix_node *tt;
582 uint8_t test_byte;
583 /*
584 * Find first bit at which v and t->rn_key differ
585 */
586 {
587 caddr_t cp2 = rn_get_key(t) + head_off;
588 int cmp_res;
589 caddr_t cplim = v + vlen;
590
591 while (cp < cplim) {
592 if (*cp2++ != *cp++) {
593 goto on1;
594 }
595 }
596 *dupentry = 1;
597 return t;
598 on1:
599 *dupentry = 0;
600 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
601 for (b = (int)(cp - v) << 3; cmp_res; b--) {
602 cmp_res >>= 1;
603 }
604 }
605 {
606 struct radix_node *p, *x = top;
607 do {
608 p = x;
609 test_byte = rnarg_get(v, vlen, x->rn_offset);
610 if (x->rn_bmask & test_byte) {
611 x = x->rn_right;
612 } else {
613 x = x->rn_left;
614 }
615 } while (b > (unsigned) x->rn_bit);
616 /* x->rn_bit < b && x->rn_bit >= 0 */
617 #ifdef RN_DEBUG
618 if (rn_debug) {
619 log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
620 }
621 #endif
622 t = rn_newpair(v, vlen, b, nodes);
623 tt = t->rn_left;
624 test_byte = rnarg_get(v, vlen, p->rn_offset);
625 if ((p->rn_bmask & test_byte) == 0) {
626 p->rn_left = t;
627 } else {
628 p->rn_right = t;
629 }
630 x->rn_parent = t;
631 t->rn_parent = p; /* frees x, p as temp vars below */
632 test_byte = rnarg_get(v, vlen, t->rn_offset);
633 if ((t->rn_bmask & test_byte) == 0) {
634 t->rn_right = x;
635 } else {
636 t->rn_right = tt;
637 t->rn_left = x;
638 }
639 #ifdef RN_DEBUG
640 if (rn_debug) {
641 log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
642 }
643 #endif
644 }
645 return tt;
646 }
647
648 struct radix_node *
rn_addmask(void * n_arg,int search,int skip)649 rn_addmask(void *n_arg, int search, int skip)
650 {
651 uint8_t mlen0;
652 caddr_t netmask = rnarg_unpack(n_arg, &mlen0);
653 struct radix_node *x __single;
654 struct netmask_rn_entry *nrn_entry;
655 caddr_t cp, cplim;
656 int b = 0, mlen, j;
657 uint8_t cmp_len;
658 caddr_t key;
659 int maskduplicated, m0, isnormal;
660
661 char addmask_key[MAX_KEYLEN] = {0, };
662
663 mlen = min(mlen0, MAX_KEYLEN);
664 if (skip == 0) {
665 skip = 1;
666 }
667 if (mlen <= skip) {
668 return mask_rnhead->rnh_nodes;
669 }
670 if (skip > 1) {
671 bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
672 }
673 if ((m0 = mlen) > skip) {
674 bcopy(netmask + skip, addmask_key + skip, mlen - skip);
675 }
676 /*
677 * Trim trailing zeroes.
678 */
679 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) {
680 cp--;
681 }
682 mlen = (int)(cp - addmask_key);
683 if (mlen <= skip) {
684 return mask_rnhead->rnh_nodes;
685 }
686
687 *addmask_key = (char)mlen;
688 x = rn_search(addmask_key, rn_masktop);
689 key = rn_get_key(x, &cmp_len);
690 if (mlen < cmp_len) {
691 cmp_len = (int8_t)mlen;
692 }
693 if (Bcmp(addmask_key, key, cmp_len) != 0) {
694 x = NULL;
695 }
696 if (x || search) {
697 return x;
698 }
699 nrn_entry = kalloc_type(struct netmask_rn_entry, Z_WAITOK_ZERO_NOFAIL);
700 netmask = nrn_entry->nrn_netmask;
701 Bcopy(addmask_key, netmask, mlen);
702 x = rn_insert(netmask, mask_rnhead, &maskduplicated, nrn_entry->nrn_base.rnb_nodes);
703 if (maskduplicated) {
704 log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
705 kfree_type(struct netmask_rn_entry, nrn_entry);
706 return x;
707 }
708 mask_rnhead->rnh_cnt++;
709 /*
710 * Calculate index of mask, and check for normalcy.
711 */
712 cplim = netmask + mlen;
713 isnormal = 1;
714 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) {
715 cp++;
716 }
717 if (cp != cplim) {
718 for (j = 0x80; (j & *cp) != 0; j >>= 1) {
719 b++;
720 }
721 if (*cp != normal_chars[b] || cp != (cplim - 1)) {
722 isnormal = 0;
723 }
724 }
725 b += (cp - netmask) << 3;
726 x->rn_bit = (short)(-1 - b);
727 if (isnormal) {
728 x->rn_flags |= RNF_NORMAL;
729 }
730 return x;
731 }
732
733 static int
734 /* XXX: arbitrary ordering for non-contiguous masks */
rn_lexobetter(void * m_arg,void * n_arg)735 rn_lexobetter(void *m_arg, void *n_arg)
736 {
737 uint8_t mplen, nlen;
738 caddr_t mp = rnarg_unpack(m_arg, &mplen);
739 caddr_t np = rnarg_unpack(n_arg, &nlen);
740
741 if (*mp > *np) {
742 return 1; /* not really, but need to check longer one first */
743 }
744 if (*mp == *np) {
745 for (int i = 1; i < mplen; ++i) {
746 if (mp[i] > np[i]) {
747 return 1;
748 }
749 }
750 }
751 return 0;
752 }
753
754 static struct radix_mask *
rn_new_radix_mask(struct radix_node * tt,struct radix_mask * next)755 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next)
756 {
757 struct radix_mask *m;
758
759 MKGet(m);
760 m->rm_bit = tt->rn_bit;
761 m->rm_flags = tt->rn_flags;
762 if (tt->rn_flags & RNF_NORMAL) {
763 m->rm_leaf = tt;
764 } else {
765 rm_set_mask(m, rn_get_mask(tt), rn_get_masklen(tt));
766 }
767 m->rm_mklist = next;
768 tt->rn_mklist = m;
769 return m;
770 }
771
772 struct radix_node *
rn_addroute(void * v_arg,void * n_arg,struct radix_node_head * head,struct radix_node treenodes[2])773 rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
774 struct radix_node treenodes[2])
775 {
776 uint8_t vlen, mlen0;
777 caddr_t v = rnarg_unpack(v_arg, &vlen);
778 caddr_t netmask = rnarg_unpack(n_arg, &mlen0);
779 uint8_t mlen = mlen0;
780 struct radix_node *t, *x = NULL, *tt;
781 struct radix_node *saved_tt, *top = head->rnh_treetop;
782 short b = 0, b_leaf = 0;
783 int keyduplicated;
784 caddr_t mmask;
785 struct radix_mask *m, **mp;
786
787 /*
788 * In dealing with non-contiguous masks, there may be
789 * many different routes which have the same mask.
790 * We will find it useful to have a unique pointer to
791 * the mask to speed avoiding duplicate references at
792 * nodes and possibly save time in calculating indices.
793 */
794 if (netmask) {
795 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) {
796 return NULL;
797 }
798 b_leaf = x->rn_bit;
799 b = -1 - x->rn_bit;
800 /*
801 * Note: the auxillary mask is stored as a "key".
802 */
803 netmask = rn_get_key(x, &mlen);
804 }
805 /*
806 * Deal with duplicated keys: attach node to previous instance
807 */
808 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
809 if (keyduplicated) {
810 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
811 if (rn_get_mask(tt) == netmask) {
812 return NULL;
813 }
814 if (netmask == 0 ||
815 (rn_get_mask(tt) != NULL &&
816 ((b_leaf < tt->rn_bit) /* index(netmask) > node */
817 || rn_refines(netmask, rn_get_mask(tt))
818 || rn_lexobetter(netmask, rn_get_mask(tt))))) {
819 break;
820 }
821 }
822 /*
823 * If the mask is not duplicated, we wouldn't
824 * find it among possible duplicate key entries
825 * anyway, so the above test doesn't hurt.
826 *
827 * We sort the masks for a duplicated key the same way as
828 * in a masklist -- most specific to least specific.
829 * This may require the unfortunate nuisance of relocating
830 * the head of the list.
831 */
832 if (tt == saved_tt) {
833 struct radix_node *xx = x;
834 /* link in at head of list */
835 (tt = treenodes)->rn_dupedkey = t;
836 tt->rn_flags = t->rn_flags;
837 tt->rn_parent = x = t->rn_parent;
838 t->rn_parent = tt; /* parent */
839 if (x->rn_left == t) {
840 x->rn_left = tt;
841 } else {
842 x->rn_right = tt;
843 }
844 saved_tt = tt; x = xx;
845 } else {
846 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
847 t->rn_dupedkey = tt;
848 tt->rn_parent = t; /* parent */
849 if (tt->rn_dupedkey) { /* parent */
850 tt->rn_dupedkey->rn_parent = tt; /* parent */
851 }
852 }
853 #ifdef RN_DEBUG
854 t = tt + 1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
855 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
856 #endif
857 rn_set_key(tt, v, vlen);
858 tt->rn_bit = -1;
859 tt->rn_flags = RNF_ACTIVE;
860 }
861 head->rnh_cnt++;
862 /*
863 * Put mask in tree.
864 */
865 if (netmask) {
866 rn_set_mask(tt, netmask, mlen);
867 tt->rn_bit = x->rn_bit;
868 tt->rn_flags |= x->rn_flags & RNF_NORMAL;
869 }
870 t = saved_tt->rn_parent;
871 if (keyduplicated) {
872 goto on2;
873 }
874 b_leaf = -1 - t->rn_bit;
875 if (t->rn_right == saved_tt) {
876 x = t->rn_left;
877 } else {
878 x = t->rn_right;
879 }
880 /* Promote general routes from below */
881 if (x->rn_bit < 0) {
882 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) {
883 if (rn_get_mask(x) != NULL && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
884 *mp = m = rn_new_radix_mask(x, NULL);
885 if (m) {
886 mp = &m->rm_mklist;
887 }
888 }
889 }
890 } else if (x->rn_mklist) {
891 /*
892 * Skip over masks whose index is > that of new node
893 */
894 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
895 if (m->rm_bit >= b_leaf) {
896 break;
897 }
898 }
899 t->rn_mklist = m; *mp = NULL;
900 }
901 on2:
902 /* Add new route to highest possible ancestor's list */
903 if ((netmask == 0) || (b > t->rn_bit)) {
904 return tt; /* can't lift at all */
905 }
906 b_leaf = tt->rn_bit;
907 do {
908 x = t;
909 t = t->rn_parent;
910 } while (b <= t->rn_bit && x != top);
911 /*
912 * Search through routes associated with node to
913 * insert new route according to index.
914 * Need same criteria as when sorting dupedkeys to avoid
915 * double loop on deletion.
916 */
917 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
918 if (m->rm_bit < b_leaf) {
919 continue;
920 }
921 if (m->rm_bit > b_leaf) {
922 break;
923 }
924 if (m->rm_flags & RNF_NORMAL) {
925 mmask = rn_get_mask(m->rm_leaf);
926 if (tt->rn_flags & RNF_NORMAL) {
927 log(LOG_ERR,
928 "Non-unique normal route, mask not entered");
929 return tt;
930 }
931 } else {
932 mmask = rm_get_mask(m);
933 }
934 if (mmask == netmask) {
935 m->rm_refs++;
936 tt->rn_mklist = m;
937 return tt;
938 }
939 if (rn_refines(netmask, mmask)
940 || rn_lexobetter(netmask, mmask)) {
941 break;
942 }
943 }
944 *mp = rn_new_radix_mask(tt, *mp);
945 return tt;
946 }
947
948 struct radix_node *
rn_delete(void * v_arg,void * netmask_arg,struct radix_node_head * head)949 rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head)
950 {
951 uint8_t vlen, mlen0;
952 caddr_t v = rnarg_unpack(v_arg, &vlen);
953 caddr_t netmask = rnarg_unpack(netmask_arg, &mlen0);
954 uint8_t masklen = mlen0, key_cmp_len, tt_key_len;
955
956 struct radix_node *t __single, *p __single, *x __single, *tt __single;
957 struct radix_mask *m, *saved_m, **mp;
958 struct radix_node *dupedkey, *saved_tt, *top;
959 int b, head_off;
960
961 x = top = head->rnh_treetop;
962 tt = saved_tt = rn_search(v, x);
963
964 /*
965 * Verify that the found node (`tt'), is valid, and that it can
966 * be compared against `v_arg'.
967 */
968 if (tt == NULL) {
969 log(LOG_ERR, "rn_delete: key not found (key_len=%d)\n", vlen);
970 return NULL;
971 }
972
973 head_off = x->rn_offset;
974 tt_key_len = rn_get_keylen(tt);
975 key_cmp_len = (uint8_t)min(vlen, tt_key_len);
976
977 if (key_cmp_len < head_off) {
978 log(LOG_ERR, "rn_delete: key too short (cmp_len=%d, head_offset=%d)\n",
979 key_cmp_len, head_off);
980 return NULL;
981 }
982
983 if (Bcmp(v + head_off, rn_get_key(tt) + head_off, key_cmp_len - head_off)) {
984 log(LOG_ERR, "rn_delete: key mismatch (cmp_len=%d, head_offset=%d)\n",
985 key_cmp_len, head_off);
986 return NULL;
987 }
988
989 /*
990 * Delete our route from mask lists.
991 */
992 if (netmask) {
993 if ((x = rn_addmask(netmask, 1, head_off)) == 0) {
994 return NULL;
995 }
996 netmask = rn_get_key(x, &masklen);
997 while (rn_get_mask(tt) != netmask) {
998 if ((tt = tt->rn_dupedkey) == 0) {
999 return NULL;
1000 }
1001 }
1002 }
1003 if (rn_get_mask(tt) == 0 || (saved_m = m = tt->rn_mklist) == 0) {
1004 goto on1;
1005 }
1006 if (tt->rn_flags & RNF_NORMAL) {
1007 if (m->rm_leaf != tt || m->rm_refs > 0) {
1008 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
1009 return NULL; /* dangling ref could cause disaster */
1010 }
1011 } else {
1012 if (rm_get_mask(m) != rn_get_mask(tt)) {
1013 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
1014 goto on1;
1015 }
1016 if (--m->rm_refs >= 0) {
1017 goto on1;
1018 }
1019 }
1020 b = -1 - tt->rn_bit;
1021 t = saved_tt->rn_parent;
1022 if (b > t->rn_bit) {
1023 goto on1; /* Wasn't lifted at all */
1024 }
1025 do {
1026 x = t;
1027 t = t->rn_parent;
1028 } while (b <= t->rn_bit && x != top);
1029 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
1030 if (m == saved_m) {
1031 *mp = m->rm_mklist;
1032 if (tt->rn_mklist == m) {
1033 tt->rn_mklist = *mp;
1034 }
1035 MKFree(m);
1036 break;
1037 }
1038 }
1039 if (m == 0) {
1040 log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
1041 if (tt->rn_flags & RNF_NORMAL) {
1042 return NULL; /* Dangling ref to us */
1043 }
1044 }
1045 on1:
1046 /*
1047 * Eliminate us from tree
1048 */
1049 if (tt->rn_flags & RNF_ROOT) {
1050 return NULL;
1051 }
1052 head->rnh_cnt--;
1053 #ifdef RN_DEBUG
1054 /* Get us out of the creation list */
1055 for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {
1056 }
1057 if (t) {
1058 t->rn_ybro = tt->rn_ybro;
1059 }
1060 #endif
1061 t = tt->rn_parent;
1062 dupedkey = saved_tt->rn_dupedkey;
1063 if (dupedkey) {
1064 /*
1065 * at this point, tt is the deletion target and saved_tt
1066 * is the head of the dupekey chain
1067 */
1068 if (tt == saved_tt) {
1069 /* remove from head of chain */
1070 x = dupedkey;
1071 x->rn_parent = t;
1072 if (t->rn_left == tt) {
1073 t->rn_left = x;
1074 } else {
1075 t->rn_right = x;
1076 }
1077 } else {
1078 /* find node in front of tt on the chain */
1079 for (x = p = saved_tt; p && p->rn_dupedkey != tt;) {
1080 p = p->rn_dupedkey;
1081 }
1082 if (p) {
1083 p->rn_dupedkey = tt->rn_dupedkey;
1084 if (tt->rn_dupedkey) { /* parent */
1085 tt->rn_dupedkey->rn_parent = p;
1086 }
1087 /* parent */
1088 } else {
1089 log(LOG_ERR, "rn_delete: couldn't find us\n");
1090 }
1091 }
1092 t = rn_lexical_parent(tt);
1093 if (t->rn_flags & RNF_ACTIVE) {
1094 #ifndef RN_DEBUG
1095 x = rn_lexical_parent(x);
1096 *x = *t;
1097 p = t->rn_parent;
1098 #else
1099 b = t->rn_info;
1100 *++x = *t;
1101 t->rn_info = b;
1102 p = t->rn_parent;
1103 #endif
1104 if (p->rn_left == t) {
1105 p->rn_left = x;
1106 } else {
1107 p->rn_right = x;
1108 }
1109 x->rn_left->rn_parent = x;
1110 x->rn_right->rn_parent = x;
1111 }
1112 goto out;
1113 }
1114 if (t->rn_left == tt) {
1115 x = t->rn_right;
1116 } else {
1117 x = t->rn_left;
1118 }
1119 p = t->rn_parent;
1120 if (p->rn_right == t) {
1121 p->rn_right = x;
1122 } else {
1123 p->rn_left = x;
1124 }
1125 x->rn_parent = p;
1126 /*
1127 * Demote routes attached to us.
1128 */
1129 if (t->rn_mklist) {
1130 if (x->rn_bit >= 0) {
1131 for (mp = &x->rn_mklist; (m = *mp);) {
1132 mp = &m->rm_mklist;
1133 }
1134 *mp = t->rn_mklist;
1135 } else {
1136 /* If there are any key,mask pairs in a sibling
1137 * duped-key chain, some subset will appear sorted
1138 * in the same order attached to our mklist */
1139 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) {
1140 if (m == x->rn_mklist) {
1141 struct radix_mask *mm = m->rm_mklist;
1142 x->rn_mklist = NULL;
1143 if (--(m->rm_refs) < 0) {
1144 MKFree(m);
1145 }
1146 m = mm;
1147 }
1148 }
1149 if (m) {
1150 log(LOG_ERR, "rn_delete: Orphaned Mask "
1151 "0x%llx at 0x%llx\n",
1152 (uint64_t)VM_KERNEL_ADDRPERM(m),
1153 (uint64_t)VM_KERNEL_ADDRPERM(x));
1154 }
1155 }
1156 }
1157 /*
1158 * We may be holding an active internal node in the tree.
1159 */
1160 x = rn_lexical_parent(tt);
1161 if (t != x) {
1162 #ifndef RN_DEBUG
1163 *t = *x;
1164 #else
1165 b = t->rn_info;
1166 *t = *x;
1167 t->rn_info = b;
1168 #endif
1169 t->rn_left->rn_parent = t;
1170 t->rn_right->rn_parent = t;
1171 p = x->rn_parent;
1172 if (p->rn_left == x) {
1173 p->rn_left = t;
1174 } else {
1175 p->rn_right = t;
1176 }
1177 }
1178 out:
1179 x = rn_lexical_parent(tt);
1180 x->rn_flags &= ~RNF_ACTIVE;
1181 tt->rn_flags &= ~RNF_ACTIVE;
1182
1183 return tt;
1184 }
1185
1186 /*
1187 * This is the same as rn_walktree() except for the parameters and the
1188 * exit.
1189 */
1190 static int
rn_walktree_from(struct radix_node_head * h,void * a,void * m,walktree_f_t * f,void * w)1191 rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f,
1192 void *w)
1193 {
1194 int error;
1195 uint8_t alen, mlen;
1196 struct radix_node *base, *next;
1197 caddr_t xa = rnarg_unpack(a, &alen);
1198 caddr_t xm = rnarg_unpack(m, &mlen);
1199 struct radix_node *rn, *last;
1200 int stopping;
1201 int lastb;
1202 int rnh_cnt;
1203
1204 /*
1205 * This gets complicated because we may delete the node while
1206 * applying the function f to it; we cannot simply use the next
1207 * leaf as the successor node in advance, because that leaf may
1208 * be removed as well during deletion when it is a clone of the
1209 * current node. When that happens, we would end up referring
1210 * to an already-freed radix node as the successor node. To get
1211 * around this issue, if we detect that the radix tree has changed
1212 * in dimension (smaller than before), we simply restart the walk
1213 * from the top of tree.
1214 */
1215 restart:
1216 last = NULL;
1217 stopping = 0;
1218 rnh_cnt = h->rnh_cnt;
1219
1220 /*
1221 * rn_search_m is sort-of-open-coded here.
1222 */
1223 for (rn = h->rnh_treetop; rn->rn_bit >= 0;) {
1224 last = rn;
1225 uint8_t test_byte;
1226 test_byte = rnarg_get(xm, mlen, rn->rn_offset);
1227 if (!(rn->rn_bmask & test_byte)) {
1228 break;
1229 }
1230 test_byte = rnarg_get(xa, alen, rn->rn_offset);
1231 if (rn->rn_bmask & test_byte) {
1232 rn = rn->rn_right;
1233 } else {
1234 rn = rn->rn_left;
1235 }
1236 }
1237
1238 /*
1239 * Two cases: either we stepped off the end of our mask,
1240 * in which case last == rn, or we reached a leaf, in which
1241 * case we want to start from the last node we looked at.
1242 * Either way, last is the node we want to start from.
1243 */
1244 rn = last;
1245 lastb = rn->rn_bit;
1246
1247 /* First time through node, go left */
1248 while (rn->rn_bit >= 0) {
1249 rn = rn->rn_left;
1250 }
1251
1252 while (!stopping) {
1253 base = rn;
1254 /* If at right child go back up, otherwise, go right */
1255 while (rn->rn_parent->rn_right == rn
1256 && !(rn->rn_flags & RNF_ROOT)) {
1257 rn = rn->rn_parent;
1258
1259 /* if went up beyond last, stop */
1260 if (rn->rn_bit <= lastb) {
1261 stopping = 1;
1262 /*
1263 * XXX we should jump to the 'Process leaves'
1264 * part, because the values of 'rn' and 'next'
1265 * we compute will not be used. Not a big deal
1266 * because this loop will terminate, but it is
1267 * inefficient and hard to understand!
1268 */
1269 }
1270 }
1271
1272 /*
1273 * The following code (bug fix) inherited from FreeBSD is
1274 * currently disabled, because our implementation uses the
1275 * RTF_PRCLONING scheme that has been abandoned in current
1276 * FreeBSD release. The scheme involves setting such a flag
1277 * for the default route entry, and therefore all off-link
1278 * destinations would become clones of that entry. Enabling
1279 * the following code would be problematic at this point,
1280 * because the removal of default route would cause only
1281 * the left-half of the tree to be traversed, leaving the
1282 * right-half untouched. If there are clones of the entry
1283 * that reside in that right-half, they would not be deleted
1284 * and would linger around until they expire or explicitly
1285 * deleted, which is a very bad thing.
1286 *
1287 * This code should be uncommented only after we get rid
1288 * of the RTF_PRCLONING scheme.
1289 */
1290 #if 0
1291 /*
1292 * At the top of the tree, no need to traverse the right
1293 * half, prevent the traversal of the entire tree in the
1294 * case of default route.
1295 */
1296 if (rn->rn_parent->rn_flags & RNF_ROOT) {
1297 stopping = 1;
1298 }
1299 #endif
1300
1301 /* Find the next *leaf* to start from */
1302 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) {
1303 rn = rn->rn_left;
1304 }
1305 next = rn;
1306 /* Process leaves */
1307 while ((rn = base) != 0) {
1308 base = rn->rn_dupedkey;
1309 if (!(rn->rn_flags & RNF_ROOT)
1310 && (error = (*f)(rn, w))) {
1311 return error;
1312 }
1313 }
1314 /* If one or more nodes got deleted, restart from top */
1315 if (h->rnh_cnt < rnh_cnt) {
1316 goto restart;
1317 }
1318 rn = next;
1319 if (rn->rn_flags & RNF_ROOT) {
1320 stopping = 1;
1321 }
1322 }
1323 return 0;
1324 }
1325
1326 static int
rn_walktree(struct radix_node_head * h,walktree_f_t * f,void * w)1327 rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
1328 {
1329 int error;
1330 struct radix_node *base, *next;
1331 struct radix_node *rn;
1332 int rnh_cnt;
1333
1334 /*
1335 * This gets complicated because we may delete the node while
1336 * applying the function f to it; we cannot simply use the next
1337 * leaf as the successor node in advance, because that leaf may
1338 * be removed as well during deletion when it is a clone of the
1339 * current node. When that happens, we would end up referring
1340 * to an already-freed radix node as the successor node. To get
1341 * around this issue, if we detect that the radix tree has changed
1342 * in dimension (smaller than before), we simply restart the walk
1343 * from the top of tree.
1344 */
1345 restart:
1346 rn = h->rnh_treetop;
1347 rnh_cnt = h->rnh_cnt;
1348
1349 /* First time through node, go left */
1350 while (rn->rn_bit >= 0) {
1351 rn = rn->rn_left;
1352 }
1353 for (;;) {
1354 base = rn;
1355 /* If at right child go back up, otherwise, go right */
1356 while (rn->rn_parent->rn_right == rn &&
1357 (rn->rn_flags & RNF_ROOT) == 0) {
1358 rn = rn->rn_parent;
1359 }
1360 /* Find the next *leaf* to start from */
1361 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) {
1362 rn = rn->rn_left;
1363 }
1364 next = rn;
1365 /* Process leaves */
1366 while ((rn = base) != NULL) {
1367 base = rn->rn_dupedkey;
1368 if (!(rn->rn_flags & RNF_ROOT)
1369 && (error = (*f)(rn, w))) {
1370 return error;
1371 }
1372 }
1373 /* If one or more nodes got deleted, restart from top */
1374 if (h->rnh_cnt < rnh_cnt) {
1375 goto restart;
1376 }
1377 rn = next;
1378 if (rn->rn_flags & RNF_ROOT) {
1379 return 0;
1380 }
1381 }
1382 /* NOTREACHED */
1383 }
1384
1385 int
rn_inithead(void ** head,int off)1386 rn_inithead(void **head, int off)
1387 {
1388 struct radix_node_head *rnh;
1389 struct radix_node *t, *tt, *ttt;
1390 if (off > INT8_MAX) {
1391 return 0;
1392 }
1393 if (*head) {
1394 return 1;
1395 }
1396
1397 rnh = zalloc_flags(radix_node_head_zone, Z_WAITOK_ZERO_NOFAIL);
1398 *head = rnh;
1399 t = rn_newpair(rn_zeros, (int8_t)MAX_KEYLEN, off, rnh->rnh_nodes);
1400 ttt = rnh->rnh_nodes + 2;
1401 t->rn_right = ttt;
1402 t->rn_parent = t;
1403 tt = t->rn_left;
1404 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
1405 tt->rn_bit = (short)(-1 - off);
1406 *ttt = *tt;
1407 rn_set_key(ttt, rn_ones, (int8_t)MAX_KEYLEN);
1408 rnh->rnh_addaddr = rn_addroute;
1409 rnh->rnh_deladdr = rn_delete;
1410 rnh->rnh_matchaddr = rn_match;
1411 rnh->rnh_matchaddr_args = rn_match_args;
1412 rnh->rnh_lookup = rn_lookup;
1413 rnh->rnh_lookup_args = rn_lookup_args;
1414 rnh->rnh_walktree = rn_walktree;
1415 rnh->rnh_walktree_from = rn_walktree_from;
1416 rnh->rnh_treetop = t;
1417 rnh->rnh_cnt = 3;
1418 return 1;
1419 }
1420
1421 void
rn_init(void)1422 rn_init(void)
1423 {
1424 struct domain *dom;
1425
1426 /*
1427 * Validate that no domain has max key that exceeds the MAX_KEYLEN constant.
1428 * This is really not expected to happen unless we introduce a new domain.
1429 * In such case, the MAX_KEYLEN constant will need to be updated,
1430 * along with the layout of `struct rn_base_entry'.
1431 *
1432 * N.B. lock already held when rn_init is called.
1433 */
1434 TAILQ_FOREACH(dom, &domains, dom_entry) {
1435 if (MAX_KEYLEN < dom->dom_maxrtkey) {
1436 log(LOG_ERR, "rn_init: encountered domain %s with max key len %d exceeding the limit %d",
1437 dom->dom_name,
1438 dom->dom_maxrtkey,
1439 MAX_KEYLEN);
1440 return;
1441 }
1442 }
1443
1444 if (rn_inithead((void **)&mask_rnhead, 0) == 0) {
1445 panic("rn_init 2");
1446 }
1447 }
1448