xref: /xnu-10002.1.13/bsd/skywalk/nexus/flowswitch/fsw_flow.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
31 #include <skywalk/nexus/flowswitch/fsw_var.h>
32 
33 static void fsw_flow_route_ctor(void *, struct flow_route *);
34 static int fsw_flow_route_resolve(void *, struct flow_route *,
35     struct __kern_packet *);
36 
37 struct flow_owner *
fsw_flow_add(struct nx_flowswitch * fsw,struct nx_flow_req * req0,int * error)38 fsw_flow_add(struct nx_flowswitch *fsw, struct nx_flow_req *req0, int *error)
39 {
40 	struct kern_nexus *nx = fsw->fsw_nx;
41 	struct flow_mgr *fm = fsw->fsw_flow_mgr;
42 	nexus_port_t nx_port = req0->nfr_nx_port;
43 	struct flow_owner_bucket *fob;
44 	struct flow_owner *fo = NULL;
45 	void *fo_context = req0->nfr_context;
46 	boolean_t nx_bound = FALSE;
47 	boolean_t new_mapping = FALSE;
48 	struct nx_flow_req req;
49 	uuid_t uuid_key;
50 	bool nx_port_pid_bound;
51 	uint32_t max_flowadv = nx->nx_prov->nxprov_params->nxp_flowadv_max;
52 	struct proc *p;
53 	int pid = req0->nfr_pid;
54 	bool low_latency = ((req0->nfr_flags & NXFLOWREQF_LOW_LATENCY) != 0);
55 #if SK_LOG
56 	uuid_string_t uuidstr;
57 #endif /* SK_LOG */
58 
59 	*error = 0;
60 
61 	/*
62 	 * Make a local copy of the original request; we'll modify the
63 	 * local copy and write it back to the original upon success.
64 	 */
65 	bcopy(req0, &req, sizeof(*req0));
66 	ASSERT(!uuid_is_null(req.nfr_flow_uuid));
67 
68 	/*
69 	 * Interface attach and detach involve holding the flowswitch lock
70 	 * held as writer.  Given that we might block in msleep() below,
71 	 * holding the flowswitch RW lock is not an option.  Instead, we
72 	 * utilize the detach barrier prevent things from going away while
73 	 * we are here.
74 	 */
75 	if (!fsw_detach_barrier_add(fsw)) {
76 		SK_ERR("netagent detached");
77 		*error = ENXIO;
78 		return NULL;
79 	}
80 
81 	/*
82 	 * We insist that PID resolves to a process for flow add, but not for
83 	 * delete. That's because those events may be posted (to us) after the
84 	 * corresponding process has exited, and so we still need to be able to
85 	 * cleanup.
86 	 */
87 	p = proc_find(pid);
88 	if (p == PROC_NULL) {
89 		SK_ERR("process for pid %d doesn't exist", pid);
90 		*error = EINVAL;
91 		fsw_detach_barrier_remove(fsw);
92 		return NULL;
93 	}
94 	req.nfr_proc = p;
95 
96 	/*
97 	 * If interface is currently attached, indicate that a bind is in
98 	 * progress, so that upon releasing the lock any threads attempting
99 	 * to detach the interface will wait until we're done.
100 	 */
101 	fob = flow_mgr_get_fob_by_pid(fm, pid);
102 	FOB_LOCK_SPIN(fob);
103 	while (fob->fob_busy_flags & (FOBF_OPEN_BUSY | FOBF_CLOSE_BUSY)) {
104 		if (++(fob->fob_open_waiters) == 0) {   /* wraparound */
105 			fob->fob_open_waiters++;
106 		}
107 		if ((*error = msleep(&fob->fob_open_waiters, &fob->fob_lock,
108 		    (PZERO + 1) | PSPIN, __FUNCTION__, NULL)) == EINTR) {
109 			SK_ERR("%s(%d) binding for uuid %s was interrupted",
110 			    sk_proc_name_address(p), pid,
111 			    sk_uuid_unparse(req.nfr_flow_uuid, uuidstr));
112 			ASSERT(fob->fob_open_waiters > 0);
113 			fob->fob_open_waiters--;
114 			FOB_UNLOCK(fob);
115 			ASSERT(fo == NULL);
116 			goto unbusy;
117 		}
118 	}
119 	if (__improbable((fob->fob_busy_flags & FOBF_DEAD) != 0)) {
120 		SK_ERR("%s(%d) binding for flow_uuid %s aborted due to "
121 		    "dead owner", sk_proc_name_address(p), pid,
122 		    sk_uuid_unparse(req.nfr_flow_uuid, uuidstr));
123 		*error = ENXIO;
124 		goto done;
125 	}
126 	ASSERT(!(fob->fob_busy_flags & FOBF_OPEN_BUSY));
127 	fob->fob_busy_flags |= FOBF_OPEN_BUSY;
128 
129 	do {
130 		fo = flow_owner_find_by_pid(fob, pid, fo_context, low_latency);
131 		if (fo == NULL && nx_port == NEXUS_PORT_ANY) {
132 			struct nxbind nxb;
133 
134 			/*
135 			 * Release lock to maintain ordering with the
136 			 * flowswitch lock; busy flag is set above.
137 			 * Also read_random() may block.
138 			 */
139 			FOB_UNLOCK(fob);
140 
141 			uuid_generate_random(uuid_key);
142 
143 			bzero(&nxb, sizeof(nxb));
144 			nxb.nxb_flags |= NXBF_MATCH_UNIQUEID;
145 			nxb.nxb_uniqueid = proc_uniqueid(p);
146 			nxb.nxb_pid = pid;
147 			nxb.nxb_flags |= NXBF_MATCH_KEY;
148 			nxb.nxb_key_len = sizeof(uuid_key);
149 			nxb.nxb_key = sk_alloc_data(nxb.nxb_key_len,
150 			    Z_WAITOK | Z_NOFAIL, skmem_tag_nx_key);
151 			bcopy(uuid_key, nxb.nxb_key, nxb.nxb_key_len);
152 
153 			/*
154 			 * Bind a new nexus port.  Directly invoke the
155 			 * nxdom_bind_port() callback of the nexus since
156 			 * the nexus instance is already known.  Free
157 			 * the UUID key upon failure; otherwise callee
158 			 * will attach it to the nexus port and clean
159 			 * it up during nxdom_unbind_port().
160 			 */
161 			if ((*error = NX_DOM(nx)->nxdom_bind_port(nx,
162 			    &nx_port, &nxb, NULL)) != 0) {
163 				sk_free_data(nxb.nxb_key, nxb.nxb_key_len);
164 				SK_ERR("%s(%d) failed to bind flow_uuid %s to a "
165 				    "nx_port (err %d)", sk_proc_name_address(p),
166 				    pid, sk_uuid_unparse(req.nfr_flow_uuid,
167 				    uuidstr), *error);
168 				nx_port = NEXUS_PORT_ANY;
169 				FOB_LOCK_SPIN(fob);
170 				break;
171 			}
172 			ASSERT(nx_port != NEXUS_PORT_ANY);
173 			nx_bound = TRUE;
174 
175 			SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s associated with "
176 			    "ephemeral nx_port %d", sk_proc_name_address(p),
177 			    pid, sk_uuid_unparse(req.nfr_flow_uuid, uuidstr),
178 			    (int)nx_port);
179 
180 			FOB_LOCK_SPIN(fob);
181 			/*
182 			 * if there's no interface associated with this,
183 			 * then bail
184 			 */
185 			if (__improbable((fob->fob_busy_flags & FOBF_DEAD) !=
186 			    0 || fsw->fsw_ifp == NULL ||
187 			    fsw->fsw_agent_session == NULL)) {
188 				SK_ERR("%s(%d) binding for flow_uuid %s aborted "
189 				    "(lost race)", sk_proc_name_address(p),
190 				    pid, sk_uuid_unparse(req.nfr_flow_uuid,
191 				    uuidstr));
192 				*error = ENXIO;
193 				break;
194 			}
195 			nx_port_pid_bound = true;
196 			uuid_copy(req.nfr_bind_key, uuid_key);
197 		} else if (fo == NULL) {
198 			/* make sure request has valid nx_port */
199 			ASSERT(nx_port != NEXUS_PORT_ANY);
200 			/*
201 			 * XXX
202 			 * Why is this path supported? Normal flows are not
203 			 * added with a specified port and this check does
204 			 * nothing to verify if the port is used.
205 			 *
206 			 * Using nx_port_is_valid() is wrong because that
207 			 * assumes the array already has non-zero ports.
208 			 */
209 			if (__improbable(nx_port >= NX_PORT_CHUNK)) {
210 				*error = EINVAL;
211 				break;
212 			}
213 			/* read_random() may block */
214 			FOB_LOCK_CONVERT(fob);
215 
216 			nx_port_pid_bound = false;
217 			uuid_generate_random(uuid_key);
218 
219 			SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s associated "
220 			    "with nx_port %d", sk_proc_name_address(p),
221 			    pid, sk_uuid_unparse(req.nfr_flow_uuid, uuidstr),
222 			    (int)nx_port);
223 		} else {
224 			/* subsequent request should reuse existing port */
225 			ASSERT(fo->fo_nx_port != NEXUS_PORT_ANY);
226 			if (nx_port != NEXUS_PORT_ANY &&
227 			    nx_port != fo->fo_nx_port) {
228 				*error = EINVAL;
229 				break;
230 			}
231 			/* fillout info for nexus port */
232 			nx_port = fo->fo_nx_port;
233 			uuid_copy(uuid_key, fo->fo_key);
234 			break;
235 		}
236 
237 		FOB_LOCK_CONVERT(fob);
238 
239 		ASSERT(nx_port != NEXUS_PORT_ANY);
240 		ASSERT(fo == NULL);
241 		fo = flow_owner_alloc(fob, p, nx_port, nx_port_pid_bound,
242 		    (max_flowadv != 0), fsw, NULL, fo_context, low_latency);
243 		if (fo == NULL) {
244 			*error = ENOMEM;
245 			break;
246 		}
247 		ASSERT(!uuid_is_null(uuid_key));
248 		uuid_copy(fo->fo_key, uuid_key);
249 		new_mapping = TRUE;
250 	} while (0);
251 
252 	if (*error != 0) {
253 		goto done;
254 	}
255 
256 	/* make sure rule ID isn't already being used */
257 	struct flow_entry *fe;
258 	if ((fe = flow_entry_find_by_uuid(fo, req.nfr_flow_uuid)) != NULL) {
259 #if SK_LOG
260 		char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
261 		SK_DSC(p, "flow uuid collision: \"%s\" already exists at "
262 		    "fe 0x%llx flags 0x%b %s(%d)",
263 		    fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), SK_KVA(fe),
264 		    fe->fe_flags, FLOWENTF_BITS, fe->fe_proc_name, fe->fe_pid);
265 #endif /* SK_LOG */
266 		*error = EEXIST;
267 		flow_entry_release(&fe);
268 		goto done;
269 	}
270 
271 	/* return assigned nexus port to caller */
272 	req.nfr_nx_port = nx_port;
273 	if (__probable(!fsw_qos_default_restricted())) {
274 		req.nfr_flags |= NXFLOWREQF_QOS_MARKING;
275 	} else {
276 		req.nfr_flags &= ~NXFLOWREQF_QOS_MARKING;
277 	}
278 
279 	FOB_LOCK_CONVERT(fob);
280 
281 	*error = flow_mgr_flow_add(nx, fm, fo, fsw->fsw_ifp, &req,
282 	    fsw_flow_route_ctor, fsw_flow_route_resolve, fsw);
283 
284 	if (*error == 0) {
285 		/* replace original request with our (modified) local copy */
286 		bcopy(&req, req0, sizeof(*req0));
287 
288 		SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s is now on "
289 		    "nx_port %d", sk_proc_name_address(p), pid,
290 		    sk_uuid_unparse(req.nfr_flow_uuid, uuidstr),
291 		    (int)nx_port);
292 	}
293 
294 done:
295 	if (__improbable(*error != 0)) {
296 		SK_ERR("%s(%d) failed to add flow_uuid %s (err %d)",
297 		    sk_proc_name_address(p), pid,
298 		    sk_uuid_unparse(req.nfr_flow_uuid, uuidstr), *error);
299 		if (fo != NULL) {
300 			if (new_mapping) {
301 				FOB_LOCK_CONVERT(fob);
302 				flow_owner_free(fob, fo);
303 			}
304 			fo = NULL;
305 		}
306 		if (nx_bound) {
307 			ASSERT(nx_port != NEXUS_PORT_ANY);
308 			FOB_LOCK_ASSERT_HELD(fob);
309 			/*
310 			 * Release lock to maintain ordering with the
311 			 * flowswitch lock; busy flag is set above.
312 			 */
313 			FOB_UNLOCK(fob);
314 			(void) NX_DOM(nx)->nxdom_unbind_port(nx, nx_port);
315 			nx_port = NEXUS_PORT_ANY;
316 			FOB_LOCK_SPIN(fob);
317 		}
318 	}
319 	fob->fob_busy_flags &= ~FOBF_OPEN_BUSY;
320 	if (__improbable(fob->fob_open_waiters > 0)) {
321 		fob->fob_open_waiters = 0;
322 		wakeup(&fob->fob_open_waiters);
323 	}
324 	if (__improbable(fob->fob_close_waiters > 0)) {
325 		fob->fob_close_waiters = 0;
326 		wakeup(&fob->fob_close_waiters);
327 	}
328 	FOB_UNLOCK(fob);
329 
330 unbusy:
331 	proc_rele(p);
332 	p = PROC_NULL;
333 	/* allow any pending detach to proceed */
334 	fsw_detach_barrier_remove(fsw);
335 
336 	return fo;
337 }
338 
339 int
fsw_flow_del(struct nx_flowswitch * fsw,struct nx_flow_req * req,bool nolinger,void * params)340 fsw_flow_del(struct nx_flowswitch *fsw, struct nx_flow_req *req, bool nolinger,
341     void *params)
342 {
343 	struct flow_mgr *fm = fsw->fsw_flow_mgr;
344 	struct kern_nexus *nx = fsw->fsw_nx;
345 	struct flow_owner_bucket *fob;
346 	struct flow_owner *fo;
347 	void *fo_context = req->nfr_context;
348 	pid_t pid = req->nfr_pid;
349 	bool low_latency = ((req->nfr_flags & NXFLOWREQF_LOW_LATENCY) != 0);
350 	int error;
351 
352 	ASSERT(!uuid_is_null(req->nfr_flow_uuid));
353 
354 	/*
355 	 * we use the detach barrier to prevent flowswith instance from
356 	 * going away while we are here.
357 	 */
358 	if (!fsw_detach_barrier_add(fsw)) {
359 		SK_ERR("netagent detached");
360 		return ENXIO;
361 	}
362 
363 	/* find mapping */
364 	fob = flow_mgr_get_fob_by_pid(fm, pid);
365 	FOB_LOCK_SPIN(fob);
366 	while (fob->fob_busy_flags & (FOBF_OPEN_BUSY | FOBF_CLOSE_BUSY)) {
367 		if (++(fob->fob_close_waiters) == 0) {  /* wraparound */
368 			fob->fob_close_waiters++;
369 		}
370 		(void) msleep(&fob->fob_close_waiters, &fob->fob_lock,
371 		    (PZERO - 1) | PSPIN, __FUNCTION__, NULL);
372 	}
373 	fob->fob_busy_flags |= FOBF_CLOSE_BUSY;
374 
375 	fo = flow_owner_find_by_pid(fob, pid, fo_context, low_latency);
376 	if (fo == NULL) {
377 		error = ENOENT;
378 		goto done;
379 	}
380 
381 	FOB_LOCK_CONVERT(fob);
382 
383 	/*
384 	 * Unbind flow.  Note that if "auto close" is enabled, the flows
385 	 * associated with this fo would have been removed when the channel
386 	 * opened to the nexus port gets closed.  If we get ENOENT just
387 	 * treat as as non-fatal and proceed further down.
388 	 */
389 	error = flow_owner_destroy_entry(fo, req->nfr_flow_uuid, nolinger,
390 	    params);
391 	if (error != 0 && error != ENOENT) {
392 		goto done;
393 	}
394 
395 	/*
396 	 * If the channel that was connected to the nexus port is no longer
397 	 * around, i.e. fsw_port_dtor() has been called, and there are no
398 	 * more flows on the owner, and the owner was bound to PID on the
399 	 * nexus port in fsw_flow_bind(), remove the nexus binding now to make
400 	 * this port available.
401 	 */
402 	if (RB_EMPTY(&fo->fo_flow_entry_id_head) &&
403 	    fo->fo_nx_port_destroyed && fo->fo_nx_port_pid_bound) {
404 		nexus_port_t nx_port = fo->fo_nx_port;
405 		ASSERT(nx_port != NEXUS_PORT_ANY);
406 		/*
407 		 * Release lock to maintain ordering with the
408 		 * flowswitch lock; busy flag is set above.
409 		 */
410 		FOB_UNLOCK(fob);
411 		(void) NX_DOM(nx)->nxdom_unbind_port(nx, nx_port);
412 		FOB_LOCK(fob);
413 		flow_owner_free(fob, fo);
414 		fo = NULL;
415 	}
416 	error = 0;
417 
418 done:
419 #if SK_LOG
420 	if (__improbable((sk_verbose & SK_VERB_FLOW) != 0)) {
421 		uuid_string_t uuidstr;
422 		if (fo != NULL) {
423 			SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s (err %d)",
424 			    fo->fo_name, fo->fo_pid,
425 			    sk_uuid_unparse(req->nfr_flow_uuid, uuidstr), error);
426 		} else {
427 			SK_DF(SK_VERB_FLOW, "pid %d flow_uuid %s (err %d)", pid,
428 			    sk_uuid_unparse(req->nfr_flow_uuid, uuidstr), error);
429 		}
430 	}
431 #endif /* SK_LOG */
432 
433 	fob->fob_busy_flags &= ~FOBF_CLOSE_BUSY;
434 	if (__improbable(fob->fob_open_waiters > 0)) {
435 		fob->fob_open_waiters = 0;
436 		wakeup(&fob->fob_open_waiters);
437 	}
438 	if (__improbable(fob->fob_close_waiters > 0)) {
439 		fob->fob_close_waiters = 0;
440 		wakeup(&fob->fob_close_waiters);
441 	}
442 	FOB_UNLOCK(fob);
443 
444 	/* allow any pending detach to proceed */
445 	fsw_detach_barrier_remove(fsw);
446 
447 	return error;
448 }
449 
450 int
fsw_flow_config(struct nx_flowswitch * fsw,struct nx_flow_req * req)451 fsw_flow_config(struct nx_flowswitch *fsw, struct nx_flow_req *req)
452 {
453 	struct flow_mgr *fm = fsw->fsw_flow_mgr;
454 	struct flow_entry *fe = NULL;
455 	struct ns_token *nt = NULL;
456 	int error = 0;
457 
458 	FSW_RLOCK(fsw);
459 	fe = flow_mgr_get_fe_by_uuid_rlock(fm, req->nfr_flow_uuid);
460 	if (fe == NULL) {
461 		SK_ERR("can't find flow");
462 		error = ENOENT;
463 		goto done;
464 	}
465 
466 	if (fe->fe_pid != req->nfr_pid) {
467 		SK_ERR("flow ownership error");
468 		error = EPERM;
469 		goto done;
470 	}
471 
472 	/* right now only support NXFLOWREQF_NOWAKEFROMSLEEP config */
473 	nt = fe->fe_port_reservation;
474 	if (req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP) {
475 		os_atomic_or(&fe->fe_flags, FLOWENTF_NOWAKEFROMSLEEP, relaxed);
476 		netns_change_flags(&nt, NETNS_NOWAKEFROMSLEEP, 0);
477 	} else {
478 		os_atomic_andnot(&fe->fe_flags, FLOWENTF_NOWAKEFROMSLEEP, relaxed);
479 		netns_change_flags(&nt, 0, NETNS_NOWAKEFROMSLEEP);
480 	}
481 #if SK_LOG
482 	char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
483 	SK_DF(SK_VERB_FLOW, "%s: NOWAKEFROMSLEEP %d",
484 	    fe_as_string(fe, dbgbuf, sizeof(dbgbuf)),
485 	    req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP ? 1 : 0);
486 #endif /* SK_LOG */
487 
488 done:
489 	if (fe != NULL) {
490 		flow_entry_release(&fe);
491 	}
492 	FSW_RUNLOCK(fsw);
493 	return error;
494 }
495 
496 static void
fsw_flow_route_ctor(void * arg,struct flow_route * fr)497 fsw_flow_route_ctor(void *arg, struct flow_route *fr)
498 {
499 	struct nx_flowswitch *fsw = arg;
500 	if (fsw->fsw_ctor != NULL) {
501 		fsw->fsw_ctor(fsw, fr);
502 	}
503 }
504 
505 static int
fsw_flow_route_resolve(void * arg,struct flow_route * fr,struct __kern_packet * pkt)506 fsw_flow_route_resolve(void *arg, struct flow_route *fr,
507     struct __kern_packet *pkt)
508 {
509 	struct nx_flowswitch *fsw = arg;
510 	return (fsw->fsw_resolve != NULL) ? fsw->fsw_resolve(fsw, fr, pkt) : 0;
511 }
512