xref: /xnu-11417.140.69/bsd/skywalk/namespace/flowidns.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * The flowidns (Flow ID namespace) module provides functionality to allocate
31  * globally unique identifier for a flow.
32  * Currently we have four modules (flowswitch, inpcb, PF & IPSec driver) in our
33  * stack which need to generate flow identifiers. These modules stamp every
34  * outgoing packet with a flowID. This flowID can be used by other upstream
35  * components in the device for flow classification purpose. For example, the
36  * FQ-Codel algorithm relies on this per packet flowID to avoid parsing every
37  * packet header for flow classification. A globally unique flowID can also be
38  * used by the networking feature offload engines operating at link layer to
39  * avoid flow classification operations.
40  * For performance reasons we use the concept of a flow domain and the
41  * data structures used by the flowidns module have per domain instance.
42  * These domains represent the above mentioned four modules generating the
43  * flowID. This allows us to avoid global lock being used while allocating &
44  * releasing flowID. FlowID is a 32-bit unsigned integer and the 2 most
45  * significant bits of flowID are used to encode the domain ID. This
46  * encoding also means that the flowID generator only needs to ensure
47  * uniqueness of identifier within a domain.
48  */
49 
50 #include <skywalk/os_skywalk.h>
51 #include <skywalk/os_skywalk_private.h>
52 #include <skywalk/namespace/flowidns.h>
53 #include <dev/random/randomdev.h>
54 #include <sys/sdt.h>
55 
56 /* maximum number of flowID generation retries in case of collision */
57 #define FLOWIDNS_MAX_FLOWID_GEN_RETRY  5
58 
59 /* 2 most significant bits of the flowID are used to encode the flow domain */
60 #define FLOWIDNS_FLOWID_DOMAIN_SHIFT   30
61 #define FLOWIDNS_FLOWID_DOMAIN_MASK    (0x03 << FLOWIDNS_FLOWID_DOMAIN_SHIFT)
62 
63 #define FLOWIDNS_FLOWID_SET_DOMAIN(_dom, _fid)    do {         \
64 	(_fid) &= ~FLOWIDNS_FLOWID_DOMAIN_MASK;                \
65 	(_fid) |= ((_dom) << FLOWIDNS_FLOWID_DOMAIN_SHIFT);    \
66 } while (0)
67 
68 #define FLOWIDNS_FLOWID_GET_DOMAIN(_dom, _fid)    do {    \
69 	(_dom) = (_fid) >> FLOWIDNS_FLOWID_DOMAIN_SHIFT;  \
70 } while (0)
71 
72 #define FLOWIDNS_DOM_LOCK(_dom)    \
73 	lck_mtx_lock(&(flowidns_domain_array[(_dom)].fd_mtx))
74 #define FLOWIDNS_DOM_UNLOCK(_dom)    \
75 	lck_mtx_unlock(&(flowidns_domain_array[(_dom)].fd_mtx))
76 
77 struct flowidns_flowid_tree_node {
78 	RB_ENTRY(flowidns_flowid_tree_node) fftn_link;
79 	struct flowidns_flow_key            fftn_flowkey;
80 	flowidns_flowid_t                   fftn_flowid;
81 };
82 
83 static LCK_GRP_DECLARE(flowidns_lock_group, "flowidns_lock");
84 static int __flowidns_inited = 0;
85 
86 static SKMEM_TYPE_DEFINE(flowidns_fftn_zone, struct flowidns_flowid_tree_node);
87 
88 __attribute__((always_inline))
89 static inline int
fftn_cmp(const struct flowidns_flowid_tree_node * fftn1,const struct flowidns_flowid_tree_node * fftn2)90 fftn_cmp(const struct flowidns_flowid_tree_node *fftn1,
91     const struct flowidns_flowid_tree_node *fftn2)
92 {
93 	return (signed)(fftn1->fftn_flowid - fftn2->fftn_flowid);
94 }
95 
96 RB_HEAD(flowidns_flowid_tree, flowidns_flowid_tree_node);
97 RB_PROTOTYPE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link,
98     fftn_cmp);
99 RB_GENERATE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link,
100     fftn_cmp);
101 
102 struct flowidns_domain {
103 	decl_lck_mtx_data(, fd_mtx);
104 	struct flowidns_flowid_tree    fd_flowid_tree;
105 	uint32_t                       fd_id;
106 	uint64_t                       fd_nallocs;
107 	uint64_t                       fd_nreleases;
108 	uint64_t                       fd_ncollisions;
109 };
110 
111 static struct flowidns_domain flowidns_domain_array[FLOWIDNS_DOMAIN_MAX + 1];
112 
113 static struct flowidns_flowid_tree_node *
flowidns_fftn_alloc(bool can_block)114 flowidns_fftn_alloc(bool can_block)
115 {
116 	struct flowidns_flowid_tree_node *fftn = NULL;
117 	zalloc_flags_t zflags;
118 
119 	zflags = can_block ? Z_WAITOK_ZERO : Z_NOWAIT_ZERO;
120 	fftn = zalloc_flags(flowidns_fftn_zone, zflags);
121 	return fftn;
122 }
123 
124 static void
flowidns_fftn_free(struct flowidns_flowid_tree_node * fftn)125 flowidns_fftn_free(struct flowidns_flowid_tree_node *fftn)
126 {
127 	zfree(flowidns_fftn_zone, fftn);
128 }
129 
130 static struct flowidns_flowid_tree_node *
flowidns_find_fftn(flowidns_flowid_t flowid,flowidns_domain_id_t domain)131 flowidns_find_fftn(flowidns_flowid_t flowid, flowidns_domain_id_t domain)
132 {
133 	struct flowidns_flowid_tree_node find = { .fftn_flowid = flowid };
134 
135 	return RB_FIND(flowidns_flowid_tree,
136 	           &(flowidns_domain_array[domain].fd_flowid_tree), &find);
137 }
138 
139 void
flowidns_allocate_flowid(flowidns_domain_id_t domain,struct flowidns_flow_key * pflow_key,flowidns_flowid_t * pflowid)140 flowidns_allocate_flowid(flowidns_domain_id_t domain,
141     struct flowidns_flow_key *pflow_key, flowidns_flowid_t *pflowid)
142 {
143 	struct flowidns_flowid_tree_node *fftn = NULL, *dup = NULL;
144 	uint32_t flowid = 0;
145 	int retry_cnt = 0;
146 
147 	VERIFY(__flowidns_inited == 1);
148 	VERIFY(pflowid != NULL);
149 	VERIFY(pflow_key != NULL);
150 	VERIFY(domain >= FLOWIDNS_DOMAIN_MIN &&
151 	    domain <= FLOWIDNS_DOMAIN_MAX);
152 
153 	FLOWIDNS_DOM_LOCK(domain);
154 
155 	fftn = flowidns_fftn_alloc(true);
156 	if (__improbable(fftn == NULL)) {
157 		panic_plain("failed to allocate flowid node\n");
158 	}
159 retry:
160 	/* try to get a non-zero flow identifier */
161 	do {
162 		read_frandom(&flowid, sizeof(flowid));
163 	} while (__improbable(flowid == 0));
164 
165 	FLOWIDNS_FLOWID_SET_DOMAIN(domain, flowid);
166 
167 	fftn->fftn_flowid = flowid;
168 	fftn->fftn_flowkey = *pflow_key;
169 	dup = RB_INSERT(flowidns_flowid_tree,
170 	    &(flowidns_domain_array[domain].fd_flowid_tree), fftn);
171 
172 	/* try to get a unique flow identifier */
173 	if (dup != NULL) {
174 		retry_cnt++;
175 		flowidns_domain_array[domain].fd_ncollisions++;
176 		SK_ERR("duplicate flowid 0x%x generated, retrying %d",
177 		    flowid, retry_cnt);
178 		/*
179 		 * safeguard to check if we need a better hash strategy.
180 		 */
181 		VERIFY(retry_cnt <= FLOWIDNS_MAX_FLOWID_GEN_RETRY);
182 		goto retry;
183 	}
184 	*pflowid = flowid;
185 	flowidns_domain_array[domain].fd_nallocs++;
186 	VERIFY(flowidns_domain_array[domain].fd_nallocs != 0);
187 
188 	FLOWIDNS_DOM_UNLOCK(domain);
189 
190 	DTRACE_SKYWALK2(fidalloc, uint32_t, domain, uint32_t, flowid);
191 }
192 
193 void
flowidns_release_flowid(flowidns_flowid_t flowid)194 flowidns_release_flowid(flowidns_flowid_t flowid)
195 {
196 	struct flowidns_flowid_tree_node *fftn;
197 	flowidns_domain_id_t domain;
198 
199 	VERIFY(__flowidns_inited == 1);
200 	VERIFY(flowid != 0);
201 
202 	FLOWIDNS_FLOWID_GET_DOMAIN(domain, flowid);
203 	VERIFY(domain >= FLOWIDNS_DOMAIN_MIN &&
204 	    domain <= FLOWIDNS_DOMAIN_MAX);
205 
206 	DTRACE_SKYWALK2(fidrel, uint32_t, domain, uint32_t, flowid);
207 
208 	FLOWIDNS_DOM_LOCK(domain);
209 
210 	fftn = flowidns_find_fftn(flowid, domain);
211 	if (fftn == NULL) {
212 		panic_plain("flowid 0x%x not found in domain %d\n", flowid,
213 		    domain);
214 	}
215 	RB_REMOVE(flowidns_flowid_tree,
216 	    &(flowidns_domain_array[domain].fd_flowid_tree), fftn);
217 	ASSERT(fftn->fftn_flowid == flowid);
218 	flowidns_fftn_free(fftn);
219 	flowidns_domain_array[domain].fd_nreleases++;
220 	VERIFY(flowidns_domain_array[domain].fd_nreleases != 0);
221 
222 	FLOWIDNS_DOM_UNLOCK(domain);
223 }
224 
225 int
flowidns_init()226 flowidns_init()
227 {
228 	flowidns_domain_id_t domain;
229 
230 	VERIFY(__flowidns_inited == 0);
231 	_CASSERT(SFH_DOMAIN_IPSEC == FLOWIDNS_DOMAIN_IPSEC);
232 	_CASSERT(SFH_DOMAIN_FLOWSWITCH == FLOWIDNS_DOMAIN_FLOWSWITCH);
233 	_CASSERT(SFH_DOMAIN_INPCB == FLOWIDNS_DOMAIN_INPCB);
234 	_CASSERT(SFH_DOMAIN_PF == FLOWIDNS_DOMAIN_PF);
235 	_CASSERT(FLOWIDNS_DOMAIN_MIN == 0);
236 	/*
237 	 * FLOWIDNS_FLOWID_DOMAIN_{MASK, SHIFT} macros are based on below
238 	 * assumption.
239 	 */
240 	_CASSERT(FLOWIDNS_DOMAIN_MAX == 3);
241 
242 	for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX;
243 	    domain++) {
244 		bzero(&flowidns_domain_array[domain],
245 		    sizeof(struct flowidns_domain));
246 		flowidns_domain_array[domain].fd_id = domain;
247 		lck_mtx_init(&(flowidns_domain_array[domain].fd_mtx),
248 		    &flowidns_lock_group, NULL);
249 		RB_INIT(&(flowidns_domain_array[domain].fd_flowid_tree));
250 	}
251 
252 	__flowidns_inited = 1;
253 	SK_D("initialized flow ID namespace");
254 	return 0;
255 }
256 
257 void
flowidns_fini(void)258 flowidns_fini(void)
259 {
260 	flowidns_domain_id_t domain;
261 	struct flowidns_flowid_tree_node *fftn, *fftn_tmp;
262 
263 	VERIFY(__flowidns_inited == 1);
264 
265 	for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX;
266 	    domain++) {
267 		FLOWIDNS_DOM_LOCK(domain);
268 
269 		RB_FOREACH_SAFE(fftn, flowidns_flowid_tree,
270 		    &(flowidns_domain_array[domain].fd_flowid_tree),
271 		    fftn_tmp) {
272 			RB_REMOVE(flowidns_flowid_tree,
273 			    &(flowidns_domain_array[domain].fd_flowid_tree),
274 			    fftn);
275 			flowidns_fftn_free(fftn);
276 		}
277 
278 		FLOWIDNS_DOM_UNLOCK(domain);
279 
280 		lck_mtx_destroy(&(flowidns_domain_array[domain].fd_mtx),
281 		    &flowidns_lock_group);
282 	}
283 
284 	__flowidns_inited = 0;
285 }
286 
287 static int flowidns_stats_sysctl SYSCTL_HANDLER_ARGS;
288 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flowidns,
289     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
290     0, 0, flowidns_stats_sysctl, "-",
291     "flowid allocations (struct sk_stats_flowidns_header, "
292     "skywalk/os_stats_private.h)");
293 
294 static int
flowidns_dump_domain(struct sysctl_req * req,struct flowidns_domain * domain)295 flowidns_dump_domain(struct sysctl_req *req, struct flowidns_domain *domain)
296 {
297 	struct flowidns_flowid_tree_node *fftn;
298 	struct sk_stats_flowidns_header header;
299 	struct sk_stats_flowidns_record record;
300 	uint64_t n_records;
301 	int err;
302 
303 	/* Fill out header */
304 	memset(&header, 0, sizeof(header));
305 	header.sfh_domain = domain->fd_id;
306 	header.sfh_nallocs = domain->fd_nallocs;
307 	header.sfh_nreleases = domain->fd_nreleases;
308 	header.sfh_ncollisions = domain->fd_ncollisions;
309 	n_records = domain->fd_nallocs - domain->fd_nreleases;
310 	VERIFY(n_records <= UINT32_MAX);
311 	header.sfh_nrecords = (uint32_t)n_records;
312 
313 	err = SYSCTL_OUT(req, &header, sizeof(header));
314 	if (err) {
315 		return err;
316 	}
317 
318 	/* Fill out records */
319 	RB_FOREACH(fftn, flowidns_flowid_tree, &domain->fd_flowid_tree) {
320 		VERIFY(n_records > 0);
321 		n_records--;
322 		bzero(&record, sizeof(record));
323 		record.sfr_flowid = fftn->fftn_flowid;
324 		record.sfr_af = fftn->fftn_flowkey.ffk_af;
325 		record.sfr_ipproto = fftn->fftn_flowkey.ffk_proto;
326 		record.sfr_protoid = fftn->fftn_flowkey.ffk_protoid;
327 		_CASSERT(sizeof(fftn->fftn_flowkey.ffk_laddr) ==
328 		    sizeof(record.sfr_laddr));
329 		_CASSERT(sizeof(fftn->fftn_flowkey.ffk_raddr) ==
330 		    sizeof(record.sfr_raddr));
331 		bcopy(&(fftn->fftn_flowkey.ffk_laddr), &record.sfr_laddr,
332 		    sizeof(record.sfr_laddr));
333 		bcopy(&(fftn->fftn_flowkey.ffk_raddr), &record.sfr_raddr,
334 		    sizeof(record.sfr_raddr));
335 
336 		err = SYSCTL_OUT(req, &record, sizeof(record));
337 		if (err) {
338 			return err;
339 		}
340 	}
341 	VERIFY(n_records == 0);
342 	return 0;
343 }
344 
345 static int
346 flowidns_stats_sysctl SYSCTL_HANDLER_ARGS
347 {
348 #pragma unused(oidp, arg1, arg2)
349 	flowidns_domain_id_t domain;
350 	int err = 0;
351 
352 	if (!kauth_cred_issuser(kauth_cred_get())) {
353 		return EPERM;
354 	}
355 
356 	if (__flowidns_inited == 0) {
357 		return ENOTSUP;
358 	}
359 
360 	net_update_uptime();
361 
362 	for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX;
363 	    domain++) {
364 		FLOWIDNS_DOM_LOCK(domain);
365 		err = flowidns_dump_domain(req, &flowidns_domain_array[domain]);
366 		FLOWIDNS_DOM_UNLOCK(domain);
367 		if (err != 0) {
368 			return err;
369 		}
370 	}
371 	/*
372 	 * If this is just a request for length, add slop because
373 	 * this is dynamically changing data
374 	 */
375 	if (req->oldptr == USER_ADDR_NULL) {
376 		req->oldidx += 20 * sizeof(struct sk_stats_flowidns_record);
377 	}
378 	return err;
379 }
380