xref: /xnu-8792.61.2/bsd/skywalk/namespace/flowidns.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * The flowidns (Flow ID namespace) module provides functionality to allocate
31  * globally unique identifier for a flow.
32  * Currently we have four modules (flowswitch, inpcb, PF & IPSec driver) in our
33  * stack which need to generate flow identifiers. These modules stamp every
34  * outgoing packet with a flowID. This flowID can be used by other upstream
35  * components in the device for flow classification purpose. For example, the
36  * FQ-Codel algorithm relies on this per packet flowID to avoid parsing every
37  * packet header for flow classification. A globally unique flowID can also be
38  * used by the networking feature offload engines operating at link layer to
39  * avoid flow classification operations.
40  * For performance reasons we use the concept of a flow domain and the
41  * data structures used by the flowidns module have per domain instance.
42  * These domains represent the above mentioned four modules generating the
43  * flowID. This allows us to avoid global lock being used while allocating &
44  * releasing flowID. FlowID is a 32-bit unsigned integer and the 2 most
45  * significant bits of flowID are used to encode the domain ID. This
46  * encoding also means that the flowID generator only needs to ensure
47  * uniqueness of identifier within a domain.
48  */
49 
50 #include <skywalk/os_skywalk.h>
51 #include <skywalk/os_skywalk_private.h>
52 #include <skywalk/namespace/flowidns.h>
53 #include <dev/random/randomdev.h>
54 #include <sys/sdt.h>
55 
56 /* maximum number of flowID generation retries in case of collision */
57 #define FLOWIDNS_MAX_FLOWID_GEN_RETRY  5
58 
59 /* 2 most significant bits of the flowID are used to encode the flow domain */
60 #define FLOWIDNS_FLOWID_DOMAIN_SHIFT   30
61 #define FLOWIDNS_FLOWID_DOMAIN_MASK    (0x03 << FLOWIDNS_FLOWID_DOMAIN_SHIFT)
62 
63 #define FLOWIDNS_FLOWID_SET_DOMAIN(_dom, _fid)    do {         \
64 	(_fid) &= ~FLOWIDNS_FLOWID_DOMAIN_MASK;                \
65 	(_fid) |= ((_dom) << FLOWIDNS_FLOWID_DOMAIN_SHIFT);    \
66 } while (0)
67 
68 #define FLOWIDNS_FLOWID_GET_DOMAIN(_dom, _fid)    do {    \
69 	(_dom) = (_fid) >> FLOWIDNS_FLOWID_DOMAIN_SHIFT;  \
70 } while (0)
71 
72 #define FLOWIDNS_DOM_LOCK(_dom)    \
73 	lck_mtx_lock(&(flowidns_domain_array[(_dom)].fd_mtx))
74 #define FLOWIDNS_DOM_UNLOCK(_dom)    \
75 	lck_mtx_unlock(&(flowidns_domain_array[(_dom)].fd_mtx))
76 
77 struct flowidns_flowid_tree_node {
78 	RB_ENTRY(flowidns_flowid_tree_node) fftn_link;
79 	struct flowidns_flow_key            fftn_flowkey;
80 	flowidns_flowid_t                   fftn_flowid;
81 };
82 
83 static LCK_GRP_DECLARE(flowidns_lock_group, "flowidns_lock");
84 static int __flowidns_inited = 0;
85 
86 static ZONE_DEFINE(flowidns_fftn_zone, SKMEM_ZONE_PREFIX ".flowidns.fftn",
87     sizeof(struct flowidns_flowid_tree_node), ZC_NONE);
88 
89 __attribute__((always_inline))
90 static inline int
fftn_cmp(const struct flowidns_flowid_tree_node * fftn1,const struct flowidns_flowid_tree_node * fftn2)91 fftn_cmp(const struct flowidns_flowid_tree_node *fftn1,
92     const struct flowidns_flowid_tree_node *fftn2)
93 {
94 	return (signed)(fftn1->fftn_flowid - fftn2->fftn_flowid);
95 }
96 
97 RB_HEAD(flowidns_flowid_tree, flowidns_flowid_tree_node);
98 RB_PROTOTYPE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link,
99     fftn_cmp);
100 RB_GENERATE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link,
101     fftn_cmp);
102 
103 struct flowidns_domain {
104 	decl_lck_mtx_data(, fd_mtx);
105 	struct flowidns_flowid_tree    fd_flowid_tree;
106 	uint32_t                       fd_id;
107 	uint64_t                       fd_nallocs;
108 	uint64_t                       fd_nreleases;
109 	uint64_t                       fd_ncollisions;
110 };
111 
112 static struct flowidns_domain flowidns_domain_array[FLOWIDNS_DOMAIN_MAX + 1];
113 
114 static struct flowidns_flowid_tree_node *
flowidns_fftn_alloc(bool can_block)115 flowidns_fftn_alloc(bool can_block)
116 {
117 	struct flowidns_flowid_tree_node *fftn = NULL;
118 	zalloc_flags_t zflags;
119 
120 	zflags = can_block ? Z_WAITOK_ZERO : Z_NOWAIT_ZERO;
121 	fftn = zalloc_flags(flowidns_fftn_zone, zflags);
122 	return fftn;
123 }
124 
125 static void
flowidns_fftn_free(struct flowidns_flowid_tree_node * fftn)126 flowidns_fftn_free(struct flowidns_flowid_tree_node *fftn)
127 {
128 	zfree(flowidns_fftn_zone, fftn);
129 }
130 
131 static struct flowidns_flowid_tree_node *
flowidns_find_fftn(flowidns_flowid_t flowid,flowidns_domain_id_t domain)132 flowidns_find_fftn(flowidns_flowid_t flowid, flowidns_domain_id_t domain)
133 {
134 	struct flowidns_flowid_tree_node find = { .fftn_flowid = flowid };
135 
136 	return RB_FIND(flowidns_flowid_tree,
137 	           &(flowidns_domain_array[domain].fd_flowid_tree), &find);
138 }
139 
140 void
flowidns_allocate_flowid(flowidns_domain_id_t domain,struct flowidns_flow_key * pflow_key,flowidns_flowid_t * pflowid)141 flowidns_allocate_flowid(flowidns_domain_id_t domain,
142     struct flowidns_flow_key *pflow_key, flowidns_flowid_t *pflowid)
143 {
144 	struct flowidns_flowid_tree_node *fftn = NULL, *dup = NULL;
145 	uint32_t flowid = 0;
146 	int retry_cnt = 0;
147 
148 	VERIFY(__flowidns_inited == 1);
149 	VERIFY(pflowid != NULL);
150 	VERIFY(pflow_key != NULL);
151 	VERIFY(domain >= FLOWIDNS_DOMAIN_MIN &&
152 	    domain <= FLOWIDNS_DOMAIN_MAX);
153 
154 	FLOWIDNS_DOM_LOCK(domain);
155 
156 	fftn = flowidns_fftn_alloc(true);
157 	if (__improbable(fftn == NULL)) {
158 		panic_plain("failed to allocate flowid node\n");
159 	}
160 retry:
161 	/* try to get a non-zero flow identifier */
162 	do {
163 		read_frandom(&flowid, sizeof(flowid));
164 	} while (__improbable(flowid == 0));
165 
166 	FLOWIDNS_FLOWID_SET_DOMAIN(domain, flowid);
167 
168 	fftn->fftn_flowid = flowid;
169 	fftn->fftn_flowkey = *pflow_key;
170 	dup = RB_INSERT(flowidns_flowid_tree,
171 	    &(flowidns_domain_array[domain].fd_flowid_tree), fftn);
172 
173 	/* try to get a unique flow identifier */
174 	if (dup != NULL) {
175 		retry_cnt++;
176 		flowidns_domain_array[domain].fd_ncollisions++;
177 		SK_ERR("duplicate flowid 0x%x generated, retrying %d",
178 		    flowid, retry_cnt);
179 		/*
180 		 * safeguard to check if we need a better hash strategy.
181 		 */
182 		VERIFY(retry_cnt <= FLOWIDNS_MAX_FLOWID_GEN_RETRY);
183 		goto retry;
184 	}
185 	*pflowid = flowid;
186 	flowidns_domain_array[domain].fd_nallocs++;
187 	VERIFY(flowidns_domain_array[domain].fd_nallocs != 0);
188 
189 	FLOWIDNS_DOM_UNLOCK(domain);
190 
191 	DTRACE_SKYWALK2(fidalloc, uint32_t, domain, uint32_t, flowid);
192 }
193 
194 void
flowidns_release_flowid(flowidns_flowid_t flowid)195 flowidns_release_flowid(flowidns_flowid_t flowid)
196 {
197 	struct flowidns_flowid_tree_node *fftn;
198 	flowidns_domain_id_t domain;
199 
200 	VERIFY(__flowidns_inited == 1);
201 	VERIFY(flowid != 0);
202 
203 	FLOWIDNS_FLOWID_GET_DOMAIN(domain, flowid);
204 	VERIFY(domain >= FLOWIDNS_DOMAIN_MIN &&
205 	    domain <= FLOWIDNS_DOMAIN_MAX);
206 
207 	DTRACE_SKYWALK2(fidrel, uint32_t, domain, uint32_t, flowid);
208 
209 	FLOWIDNS_DOM_LOCK(domain);
210 
211 	fftn = flowidns_find_fftn(flowid, domain);
212 	if (fftn == NULL) {
213 		panic_plain("flowid 0x%x not found in domain %d\n", flowid,
214 		    domain);
215 	}
216 	RB_REMOVE(flowidns_flowid_tree,
217 	    &(flowidns_domain_array[domain].fd_flowid_tree), fftn);
218 	ASSERT(fftn->fftn_flowid == flowid);
219 	flowidns_fftn_free(fftn);
220 	flowidns_domain_array[domain].fd_nreleases++;
221 	VERIFY(flowidns_domain_array[domain].fd_nreleases != 0);
222 
223 	FLOWIDNS_DOM_UNLOCK(domain);
224 }
225 
226 int
flowidns_init()227 flowidns_init()
228 {
229 	flowidns_domain_id_t domain;
230 
231 	VERIFY(__flowidns_inited == 0);
232 	_CASSERT(SFH_DOMAIN_IPSEC == FLOWIDNS_DOMAIN_IPSEC);
233 	_CASSERT(SFH_DOMAIN_FLOWSWITCH == FLOWIDNS_DOMAIN_FLOWSWITCH);
234 	_CASSERT(SFH_DOMAIN_INPCB == FLOWIDNS_DOMAIN_INPCB);
235 	_CASSERT(SFH_DOMAIN_PF == FLOWIDNS_DOMAIN_PF);
236 	_CASSERT(FLOWIDNS_DOMAIN_MIN == 0);
237 	/*
238 	 * FLOWIDNS_FLOWID_DOMAIN_{MASK, SHIFT} macros are based on below
239 	 * assumption.
240 	 */
241 	_CASSERT(FLOWIDNS_DOMAIN_MAX == 3);
242 
243 	for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX;
244 	    domain++) {
245 		bzero(&flowidns_domain_array[domain],
246 		    sizeof(struct flowidns_domain));
247 		flowidns_domain_array[domain].fd_id = domain;
248 		lck_mtx_init(&(flowidns_domain_array[domain].fd_mtx),
249 		    &flowidns_lock_group, NULL);
250 		RB_INIT(&(flowidns_domain_array[domain].fd_flowid_tree));
251 	}
252 
253 	__flowidns_inited = 1;
254 	SK_D("initialized flow ID namespace");
255 	return 0;
256 }
257 
258 void
flowidns_fini(void)259 flowidns_fini(void)
260 {
261 	flowidns_domain_id_t domain;
262 	struct flowidns_flowid_tree_node *fftn, *fftn_tmp;
263 
264 	VERIFY(__flowidns_inited == 1);
265 
266 	for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX;
267 	    domain++) {
268 		FLOWIDNS_DOM_LOCK(domain);
269 
270 		RB_FOREACH_SAFE(fftn, flowidns_flowid_tree,
271 		    &(flowidns_domain_array[domain].fd_flowid_tree),
272 		    fftn_tmp) {
273 			RB_REMOVE(flowidns_flowid_tree,
274 			    &(flowidns_domain_array[domain].fd_flowid_tree),
275 			    fftn);
276 			flowidns_fftn_free(fftn);
277 		}
278 
279 		FLOWIDNS_DOM_UNLOCK(domain);
280 
281 		lck_mtx_destroy(&(flowidns_domain_array[domain].fd_mtx),
282 		    &flowidns_lock_group);
283 	}
284 
285 	__flowidns_inited = 0;
286 }
287 
288 static int flowidns_stats_sysctl SYSCTL_HANDLER_ARGS;
289 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flowidns,
290     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
291     0, 0, flowidns_stats_sysctl, "-",
292     "flowid allocations (struct sk_stats_flowidns_header, "
293     "skywalk/os_stats_private.h)");
294 
295 static int
flowidns_dump_domain(struct sysctl_req * req,struct flowidns_domain * domain)296 flowidns_dump_domain(struct sysctl_req *req, struct flowidns_domain *domain)
297 {
298 	struct flowidns_flowid_tree_node *fftn;
299 	struct sk_stats_flowidns_header header;
300 	struct sk_stats_flowidns_record record;
301 	uint64_t n_records;
302 	int err;
303 
304 	/* Fill out header */
305 	memset(&header, 0, sizeof(header));
306 	header.sfh_domain = domain->fd_id;
307 	header.sfh_nallocs = domain->fd_nallocs;
308 	header.sfh_nreleases = domain->fd_nreleases;
309 	header.sfh_ncollisions = domain->fd_ncollisions;
310 	n_records = domain->fd_nallocs - domain->fd_nreleases;
311 	VERIFY(n_records <= UINT32_MAX);
312 	header.sfh_nrecords = (uint32_t)n_records;
313 
314 	err = SYSCTL_OUT(req, &header, sizeof(header));
315 	if (err) {
316 		return err;
317 	}
318 
319 	/* Fill out records */
320 	RB_FOREACH(fftn, flowidns_flowid_tree, &domain->fd_flowid_tree) {
321 		VERIFY(n_records > 0);
322 		n_records--;
323 		bzero(&record, sizeof(record));
324 		record.sfr_flowid = fftn->fftn_flowid;
325 		record.sfr_af = fftn->fftn_flowkey.ffk_af;
326 		record.sfr_ipproto = fftn->fftn_flowkey.ffk_proto;
327 		record.sfr_protoid = fftn->fftn_flowkey.ffk_protoid;
328 		_CASSERT(sizeof(fftn->fftn_flowkey.ffk_laddr) ==
329 		    sizeof(record.sfr_laddr));
330 		_CASSERT(sizeof(fftn->fftn_flowkey.ffk_raddr) ==
331 		    sizeof(record.sfr_raddr));
332 		bcopy(&(fftn->fftn_flowkey.ffk_laddr), &record.sfr_laddr,
333 		    sizeof(record.sfr_laddr));
334 		bcopy(&(fftn->fftn_flowkey.ffk_raddr), &record.sfr_raddr,
335 		    sizeof(record.sfr_raddr));
336 
337 		err = SYSCTL_OUT(req, &record, sizeof(record));
338 		if (err) {
339 			return err;
340 		}
341 	}
342 	VERIFY(n_records == 0);
343 	return 0;
344 }
345 
346 static int
347 flowidns_stats_sysctl SYSCTL_HANDLER_ARGS
348 {
349 #pragma unused(oidp, arg1, arg2)
350 	flowidns_domain_id_t domain;
351 	int err = 0;
352 
353 	if (!kauth_cred_issuser(kauth_cred_get())) {
354 		return EPERM;
355 	}
356 
357 	if (__flowidns_inited == 0) {
358 		return ENOTSUP;
359 	}
360 
361 	net_update_uptime();
362 
363 	for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX;
364 	    domain++) {
365 		FLOWIDNS_DOM_LOCK(domain);
366 		err = flowidns_dump_domain(req, &flowidns_domain_array[domain]);
367 		FLOWIDNS_DOM_UNLOCK(domain);
368 		if (err != 0) {
369 			return err;
370 		}
371 	}
372 	/*
373 	 * If this is just a request for length, add slop because
374 	 * this is dynamically changing data
375 	 */
376 	if (req->oldptr == USER_ADDR_NULL) {
377 		req->oldidx += 20 * sizeof(struct sk_stats_flowidns_record);
378 	}
379 	return err;
380 }
381