1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * The flowidns (Flow ID namespace) module provides functionality to allocate
31 * globally unique identifier for a flow.
32 * Currently we have four modules (flowswitch, inpcb, PF & IPSec driver) in our
33 * stack which need to generate flow identifiers. These modules stamp every
34 * outgoing packet with a flowID. This flowID can be used by other upstream
35 * components in the device for flow classification purpose. For example, the
36 * FQ-Codel algorithm relies on this per packet flowID to avoid parsing every
37 * packet header for flow classification. A globally unique flowID can also be
38 * used by the networking feature offload engines operating at link layer to
39 * avoid flow classification operations.
40 * For performance reasons we use the concept of a flow domain and the
41 * data structures used by the flowidns module have per domain instance.
42 * These domains represent the above mentioned four modules generating the
43 * flowID. This allows us to avoid global lock being used while allocating &
44 * releasing flowID. FlowID is a 32-bit unsigned integer and the 2 most
45 * significant bits of flowID are used to encode the domain ID. This
46 * encoding also means that the flowID generator only needs to ensure
47 * uniqueness of identifier within a domain.
48 */
49
50 #include <skywalk/os_skywalk.h>
51 #include <skywalk/os_skywalk_private.h>
52 #include <skywalk/namespace/flowidns.h>
53 #include <dev/random/randomdev.h>
54 #include <sys/sdt.h>
55
56 /* maximum number of flowID generation retries in case of collision */
57 #define FLOWIDNS_MAX_FLOWID_GEN_RETRY 5
58
59 /* 2 most significant bits of the flowID are used to encode the flow domain */
60 #define FLOWIDNS_FLOWID_DOMAIN_SHIFT 30
61 #define FLOWIDNS_FLOWID_DOMAIN_MASK (0x03 << FLOWIDNS_FLOWID_DOMAIN_SHIFT)
62
63 #define FLOWIDNS_FLOWID_SET_DOMAIN(_dom, _fid) do { \
64 (_fid) &= ~FLOWIDNS_FLOWID_DOMAIN_MASK; \
65 (_fid) |= ((_dom) << FLOWIDNS_FLOWID_DOMAIN_SHIFT); \
66 } while (0)
67
68 #define FLOWIDNS_FLOWID_GET_DOMAIN(_dom, _fid) do { \
69 (_dom) = (_fid) >> FLOWIDNS_FLOWID_DOMAIN_SHIFT; \
70 } while (0)
71
72 #define FLOWIDNS_DOM_LOCK(_dom) \
73 lck_mtx_lock(&(flowidns_domain_array[(_dom)].fd_mtx))
74 #define FLOWIDNS_DOM_UNLOCK(_dom) \
75 lck_mtx_unlock(&(flowidns_domain_array[(_dom)].fd_mtx))
76
77 struct flowidns_flowid_tree_node {
78 RB_ENTRY(flowidns_flowid_tree_node) fftn_link;
79 struct flowidns_flow_key fftn_flowkey;
80 flowidns_flowid_t fftn_flowid;
81 };
82
83 static LCK_GRP_DECLARE(flowidns_lock_group, "flowidns_lock");
84 static int __flowidns_inited = 0;
85
86 static SKMEM_TYPE_DEFINE(flowidns_fftn_zone, struct flowidns_flowid_tree_node);
87
88 __attribute__((always_inline))
89 static inline int
fftn_cmp(const struct flowidns_flowid_tree_node * fftn1,const struct flowidns_flowid_tree_node * fftn2)90 fftn_cmp(const struct flowidns_flowid_tree_node *fftn1,
91 const struct flowidns_flowid_tree_node *fftn2)
92 {
93 return (signed)(fftn1->fftn_flowid - fftn2->fftn_flowid);
94 }
95
96 RB_HEAD(flowidns_flowid_tree, flowidns_flowid_tree_node);
97 RB_PROTOTYPE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link,
98 fftn_cmp);
99 RB_GENERATE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link,
100 fftn_cmp);
101
102 struct flowidns_domain {
103 decl_lck_mtx_data(, fd_mtx);
104 struct flowidns_flowid_tree fd_flowid_tree;
105 uint32_t fd_id;
106 uint64_t fd_nallocs;
107 uint64_t fd_nreleases;
108 uint64_t fd_ncollisions;
109 };
110
111 static struct flowidns_domain flowidns_domain_array[FLOWIDNS_DOMAIN_MAX + 1];
112
113 static struct flowidns_flowid_tree_node *
flowidns_fftn_alloc(bool can_block)114 flowidns_fftn_alloc(bool can_block)
115 {
116 struct flowidns_flowid_tree_node *fftn = NULL;
117 zalloc_flags_t zflags;
118
119 zflags = can_block ? Z_WAITOK_ZERO : Z_NOWAIT_ZERO;
120 fftn = zalloc_flags(flowidns_fftn_zone, zflags);
121 return fftn;
122 }
123
124 static void
flowidns_fftn_free(struct flowidns_flowid_tree_node * fftn)125 flowidns_fftn_free(struct flowidns_flowid_tree_node *fftn)
126 {
127 zfree(flowidns_fftn_zone, fftn);
128 }
129
130 static struct flowidns_flowid_tree_node *
flowidns_find_fftn(flowidns_flowid_t flowid,flowidns_domain_id_t domain)131 flowidns_find_fftn(flowidns_flowid_t flowid, flowidns_domain_id_t domain)
132 {
133 struct flowidns_flowid_tree_node find = { .fftn_flowid = flowid };
134
135 return RB_FIND(flowidns_flowid_tree,
136 &(flowidns_domain_array[domain].fd_flowid_tree), &find);
137 }
138
139 void
flowidns_allocate_flowid(flowidns_domain_id_t domain,struct flowidns_flow_key * pflow_key,flowidns_flowid_t * pflowid)140 flowidns_allocate_flowid(flowidns_domain_id_t domain,
141 struct flowidns_flow_key *pflow_key, flowidns_flowid_t *pflowid)
142 {
143 struct flowidns_flowid_tree_node *fftn = NULL, *dup = NULL;
144 uint32_t flowid = 0;
145 int retry_cnt = 0;
146
147 VERIFY(__flowidns_inited == 1);
148 VERIFY(pflowid != NULL);
149 VERIFY(pflow_key != NULL);
150 VERIFY(domain >= FLOWIDNS_DOMAIN_MIN &&
151 domain <= FLOWIDNS_DOMAIN_MAX);
152
153 FLOWIDNS_DOM_LOCK(domain);
154
155 fftn = flowidns_fftn_alloc(true);
156 if (__improbable(fftn == NULL)) {
157 panic_plain("failed to allocate flowid node\n");
158 }
159 retry:
160 /* try to get a non-zero flow identifier */
161 do {
162 read_frandom(&flowid, sizeof(flowid));
163 } while (__improbable(flowid == 0));
164
165 FLOWIDNS_FLOWID_SET_DOMAIN(domain, flowid);
166
167 fftn->fftn_flowid = flowid;
168 fftn->fftn_flowkey = *pflow_key;
169 dup = RB_INSERT(flowidns_flowid_tree,
170 &(flowidns_domain_array[domain].fd_flowid_tree), fftn);
171
172 /* try to get a unique flow identifier */
173 if (dup != NULL) {
174 retry_cnt++;
175 flowidns_domain_array[domain].fd_ncollisions++;
176 SK_ERR("duplicate flowid 0x%x generated, retrying %d",
177 flowid, retry_cnt);
178 /*
179 * safeguard to check if we need a better hash strategy.
180 */
181 VERIFY(retry_cnt <= FLOWIDNS_MAX_FLOWID_GEN_RETRY);
182 goto retry;
183 }
184 *pflowid = flowid;
185 flowidns_domain_array[domain].fd_nallocs++;
186 VERIFY(flowidns_domain_array[domain].fd_nallocs != 0);
187
188 FLOWIDNS_DOM_UNLOCK(domain);
189
190 DTRACE_SKYWALK2(fidalloc, uint32_t, domain, uint32_t, flowid);
191 }
192
193 void
flowidns_release_flowid(flowidns_flowid_t flowid)194 flowidns_release_flowid(flowidns_flowid_t flowid)
195 {
196 struct flowidns_flowid_tree_node *fftn;
197 flowidns_domain_id_t domain;
198
199 VERIFY(__flowidns_inited == 1);
200 VERIFY(flowid != 0);
201
202 FLOWIDNS_FLOWID_GET_DOMAIN(domain, flowid);
203 VERIFY(domain >= FLOWIDNS_DOMAIN_MIN &&
204 domain <= FLOWIDNS_DOMAIN_MAX);
205
206 DTRACE_SKYWALK2(fidrel, uint32_t, domain, uint32_t, flowid);
207
208 FLOWIDNS_DOM_LOCK(domain);
209
210 fftn = flowidns_find_fftn(flowid, domain);
211 if (fftn == NULL) {
212 panic_plain("flowid 0x%x not found in domain %d\n", flowid,
213 domain);
214 }
215 RB_REMOVE(flowidns_flowid_tree,
216 &(flowidns_domain_array[domain].fd_flowid_tree), fftn);
217 ASSERT(fftn->fftn_flowid == flowid);
218 flowidns_fftn_free(fftn);
219 flowidns_domain_array[domain].fd_nreleases++;
220 VERIFY(flowidns_domain_array[domain].fd_nreleases != 0);
221
222 FLOWIDNS_DOM_UNLOCK(domain);
223 }
224
225 int
flowidns_init()226 flowidns_init()
227 {
228 flowidns_domain_id_t domain;
229
230 VERIFY(__flowidns_inited == 0);
231 _CASSERT(SFH_DOMAIN_IPSEC == FLOWIDNS_DOMAIN_IPSEC);
232 _CASSERT(SFH_DOMAIN_FLOWSWITCH == FLOWIDNS_DOMAIN_FLOWSWITCH);
233 _CASSERT(SFH_DOMAIN_INPCB == FLOWIDNS_DOMAIN_INPCB);
234 _CASSERT(SFH_DOMAIN_PF == FLOWIDNS_DOMAIN_PF);
235 _CASSERT(FLOWIDNS_DOMAIN_MIN == 0);
236 /*
237 * FLOWIDNS_FLOWID_DOMAIN_{MASK, SHIFT} macros are based on below
238 * assumption.
239 */
240 _CASSERT(FLOWIDNS_DOMAIN_MAX == 3);
241
242 for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX;
243 domain++) {
244 bzero(&flowidns_domain_array[domain],
245 sizeof(struct flowidns_domain));
246 flowidns_domain_array[domain].fd_id = domain;
247 lck_mtx_init(&(flowidns_domain_array[domain].fd_mtx),
248 &flowidns_lock_group, NULL);
249 RB_INIT(&(flowidns_domain_array[domain].fd_flowid_tree));
250 }
251
252 __flowidns_inited = 1;
253 SK_D("initialized flow ID namespace");
254 return 0;
255 }
256
257 void
flowidns_fini(void)258 flowidns_fini(void)
259 {
260 flowidns_domain_id_t domain;
261 struct flowidns_flowid_tree_node *fftn, *fftn_tmp;
262
263 VERIFY(__flowidns_inited == 1);
264
265 for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX;
266 domain++) {
267 FLOWIDNS_DOM_LOCK(domain);
268
269 RB_FOREACH_SAFE(fftn, flowidns_flowid_tree,
270 &(flowidns_domain_array[domain].fd_flowid_tree),
271 fftn_tmp) {
272 RB_REMOVE(flowidns_flowid_tree,
273 &(flowidns_domain_array[domain].fd_flowid_tree),
274 fftn);
275 flowidns_fftn_free(fftn);
276 }
277
278 FLOWIDNS_DOM_UNLOCK(domain);
279
280 lck_mtx_destroy(&(flowidns_domain_array[domain].fd_mtx),
281 &flowidns_lock_group);
282 }
283
284 __flowidns_inited = 0;
285 }
286
287 static int flowidns_stats_sysctl SYSCTL_HANDLER_ARGS;
288 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flowidns,
289 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
290 0, 0, flowidns_stats_sysctl, "-",
291 "flowid allocations (struct sk_stats_flowidns_header, "
292 "skywalk/os_stats_private.h)");
293
294 static int
flowidns_dump_domain(struct sysctl_req * req,struct flowidns_domain * domain)295 flowidns_dump_domain(struct sysctl_req *req, struct flowidns_domain *domain)
296 {
297 struct flowidns_flowid_tree_node *fftn;
298 struct sk_stats_flowidns_header header;
299 struct sk_stats_flowidns_record record;
300 uint64_t n_records;
301 int err;
302
303 /* Fill out header */
304 memset(&header, 0, sizeof(header));
305 header.sfh_domain = domain->fd_id;
306 header.sfh_nallocs = domain->fd_nallocs;
307 header.sfh_nreleases = domain->fd_nreleases;
308 header.sfh_ncollisions = domain->fd_ncollisions;
309 n_records = domain->fd_nallocs - domain->fd_nreleases;
310 VERIFY(n_records <= UINT32_MAX);
311 header.sfh_nrecords = (uint32_t)n_records;
312
313 err = SYSCTL_OUT(req, &header, sizeof(header));
314 if (err) {
315 return err;
316 }
317
318 /* Fill out records */
319 RB_FOREACH(fftn, flowidns_flowid_tree, &domain->fd_flowid_tree) {
320 VERIFY(n_records > 0);
321 n_records--;
322 bzero(&record, sizeof(record));
323 record.sfr_flowid = fftn->fftn_flowid;
324 record.sfr_af = fftn->fftn_flowkey.ffk_af;
325 record.sfr_ipproto = fftn->fftn_flowkey.ffk_proto;
326 record.sfr_protoid = fftn->fftn_flowkey.ffk_protoid;
327 _CASSERT(sizeof(fftn->fftn_flowkey.ffk_laddr) ==
328 sizeof(record.sfr_laddr));
329 _CASSERT(sizeof(fftn->fftn_flowkey.ffk_raddr) ==
330 sizeof(record.sfr_raddr));
331 bcopy(&(fftn->fftn_flowkey.ffk_laddr), &record.sfr_laddr,
332 sizeof(record.sfr_laddr));
333 bcopy(&(fftn->fftn_flowkey.ffk_raddr), &record.sfr_raddr,
334 sizeof(record.sfr_raddr));
335
336 err = SYSCTL_OUT(req, &record, sizeof(record));
337 if (err) {
338 return err;
339 }
340 }
341 VERIFY(n_records == 0);
342 return 0;
343 }
344
345 static int
346 flowidns_stats_sysctl SYSCTL_HANDLER_ARGS
347 {
348 #pragma unused(oidp, arg1, arg2)
349 flowidns_domain_id_t domain;
350 int err = 0;
351
352 if (!kauth_cred_issuser(kauth_cred_get())) {
353 return EPERM;
354 }
355
356 if (__flowidns_inited == 0) {
357 return ENOTSUP;
358 }
359
360 net_update_uptime();
361
362 for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX;
363 domain++) {
364 FLOWIDNS_DOM_LOCK(domain);
365 err = flowidns_dump_domain(req, &flowidns_domain_array[domain]);
366 FLOWIDNS_DOM_UNLOCK(domain);
367 if (err != 0) {
368 return err;
369 }
370 }
371 /*
372 * If this is just a request for length, add slop because
373 * this is dynamically changing data
374 */
375 if (req->oldptr == USER_ADDR_NULL) {
376 req->oldidx += 20 * sizeof(struct sk_stats_flowidns_record);
377 }
378 return err;
379 }
380