xref: /xnu-8019.80.24/bsd/skywalk/nexus/upipe/nx_user_pipe.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  *   1. Redistributions of source code must retain the above copyright
36  *      notice, this list of conditions and the following disclaimer.
37  *   2. Redistributions in binary form must reproduce the above copyright
38  *      notice, this list of conditions and the following disclaimer in the
39  *      documentation and/or other materials provided with the distribution.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  */
53 
54 #include <skywalk/os_skywalk_private.h>
55 #include <skywalk/nexus/upipe/nx_user_pipe.h>
56 
57 #define NX_UPIPE_RINGSIZE       128 /* default ring size */
58 #define NX_UPIPE_MAXRINGS       NX_MAX_NUM_RING_PAIR
59 #define NX_UPIPE_MINSLOTS       2       /* XXX same as above */
60 #define NX_UPIPE_MAXSLOTS       4096    /* XXX same as above */
61 #define NX_UPIPE_BUFSIZE        (2 * 1024)
62 #define NX_UPIPE_MINBUFSIZE     1024
63 #define NX_UPIPE_MAXBUFSIZE     (16 * 1024)
64 #define NX_UPIPE_MHINTS         NEXUS_MHINTS_NORMAL
65 
66 static int nx_upipe_na_alloc(struct nexus_adapter *, uint32_t);
67 static struct nexus_upipe_adapter *nx_upipe_find(struct nexus_adapter *,
68     uint32_t);
69 static int nx_upipe_na_add(struct nexus_adapter *,
70     struct nexus_upipe_adapter *);
71 static void nx_upipe_na_remove(struct nexus_adapter *,
72     struct nexus_upipe_adapter *);
73 static int nx_upipe_na_txsync(struct __kern_channel_ring *,
74     struct proc *, uint32_t);
75 static int nx_upipe_na_txsync_locked(struct __kern_channel_ring *,
76     struct proc *, uint32_t, int *, boolean_t);
77 static int nx_upipe_na_rxsync(struct __kern_channel_ring *,
78     struct proc *, uint32_t);
79 static int nx_upipe_na_krings_create(struct nexus_adapter *,
80     struct kern_channel *);
81 static int nx_upipe_na_activate(struct nexus_adapter *, na_activate_mode_t);
82 static void nx_upipe_na_krings_delete(struct nexus_adapter *,
83     struct kern_channel *, boolean_t);
84 static void nx_upipe_na_dtor(struct nexus_adapter *);
85 
86 static void nx_upipe_dom_init(struct nxdom *);
87 static void nx_upipe_dom_terminate(struct nxdom *);
88 static void nx_upipe_dom_fini(struct nxdom *);
89 static int nx_upipe_dom_bind_port(struct kern_nexus *, nexus_port_t *,
90     struct nxbind *, void *);
91 static int nx_upipe_dom_unbind_port(struct kern_nexus *, nexus_port_t);
92 static int nx_upipe_dom_connect(struct kern_nexus_domain_provider *,
93     struct kern_nexus *, struct kern_channel *, struct chreq *,
94     struct kern_channel *, struct nxbind *, struct proc *);
95 static void nx_upipe_dom_disconnect(struct kern_nexus_domain_provider *,
96     struct kern_nexus *, struct kern_channel *);
97 static void nx_upipe_dom_defunct(struct kern_nexus_domain_provider *,
98     struct kern_nexus *, struct kern_channel *, struct proc *);
99 static void nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider *,
100     struct kern_nexus *, struct kern_channel *, boolean_t);
101 
102 static int nx_upipe_prov_init(struct kern_nexus_domain_provider *);
103 static int nx_upipe_prov_params_adjust(
104 	const struct kern_nexus_domain_provider *, const struct nxprov_params *,
105 	struct nxprov_adjusted_params *);
106 static int nx_upipe_prov_params(struct kern_nexus_domain_provider *,
107     const uint32_t, const struct nxprov_params *, struct nxprov_params *,
108     struct skmem_region_params[SKMEM_REGIONS]);
109 static int nx_upipe_prov_mem_new(struct kern_nexus_domain_provider *,
110     struct kern_nexus *, struct nexus_adapter *);
111 static void nx_upipe_prov_fini(struct kern_nexus_domain_provider *);
112 static int nx_upipe_prov_nx_ctor(struct kern_nexus *);
113 static void nx_upipe_prov_nx_dtor(struct kern_nexus *);
114 
115 static struct nexus_upipe_adapter *na_upipe_alloc(zalloc_flags_t);
116 static void na_upipe_free(struct nexus_adapter *);
117 
118 static struct nx_upipe *nx_upipe_alloc(zalloc_flags_t);
119 static void nx_upipe_free(struct nx_upipe *);
120 
121 #if (DEVELOPMENT || DEBUG)
122 static uint32_t nx_upipe_mhints = 0;
123 SYSCTL_NODE(_kern_skywalk, OID_AUTO, upipe, CTLFLAG_RW | CTLFLAG_LOCKED,
124     0, "Skywalk upipe tuning");
125 SYSCTL_UINT(_kern_skywalk_upipe, OID_AUTO, nx_mhints,
126     CTLFLAG_RW | CTLFLAG_LOCKED, &nx_upipe_mhints, 0,
127     "upipe nexus memory usage hints");
128 #endif /* (DEVELOPMENT || DEBUG) */
129 
130 struct nxdom nx_upipe_dom_s = {
131 	.nxdom_prov_head =
132     STAILQ_HEAD_INITIALIZER(nx_upipe_dom_s.nxdom_prov_head),
133 	.nxdom_type =           NEXUS_TYPE_USER_PIPE,
134 	.nxdom_md_type =        NEXUS_META_TYPE_QUANTUM,
135 	.nxdom_md_subtype =     NEXUS_META_SUBTYPE_PAYLOAD,
136 	.nxdom_name =           "upipe",
137 	.nxdom_ports =          {
138 		.nb_def = 2,
139 		.nb_min = 2,
140 		.nb_max = 2,
141 	},
142 	.nxdom_tx_rings = {
143 		.nb_def = 1,
144 		.nb_min = 1,
145 		.nb_max = NX_UPIPE_MAXRINGS,
146 	},
147 	.nxdom_rx_rings = {
148 		.nb_def = 1,
149 		.nb_min = 1,
150 		.nb_max = NX_UPIPE_MAXRINGS,
151 	},
152 	.nxdom_tx_slots = {
153 		.nb_def = NX_UPIPE_RINGSIZE,
154 		.nb_min = NX_UPIPE_MINSLOTS,
155 		.nb_max = NX_UPIPE_MAXSLOTS,
156 	},
157 	.nxdom_rx_slots = {
158 		.nb_def = NX_UPIPE_RINGSIZE,
159 		.nb_min = NX_UPIPE_MINSLOTS,
160 		.nb_max = NX_UPIPE_MAXSLOTS,
161 	},
162 	.nxdom_buf_size = {
163 		.nb_def = NX_UPIPE_BUFSIZE,
164 		.nb_min = NX_UPIPE_MINBUFSIZE,
165 		.nb_max = NX_UPIPE_MAXBUFSIZE,
166 	},
167 	.nxdom_meta_size = {
168 		.nb_def = NX_METADATA_OBJ_MIN_SZ,
169 		.nb_min = NX_METADATA_OBJ_MIN_SZ,
170 		.nb_max = NX_METADATA_USR_MAX_SZ,
171 	},
172 	.nxdom_stats_size = {
173 		.nb_def = 0,
174 		.nb_min = 0,
175 		.nb_max = NX_STATS_MAX_SZ,
176 	},
177 	.nxdom_pipes = {
178 		.nb_def = 0,
179 		.nb_min = 0,
180 		.nb_max = NX_UPIPE_MAXPIPES,
181 	},
182 	.nxdom_mhints = {
183 		.nb_def = NX_UPIPE_MHINTS,
184 		.nb_min = NEXUS_MHINTS_NORMAL,
185 		.nb_max = (NEXUS_MHINTS_NORMAL | NEXUS_MHINTS_WILLNEED |
186     NEXUS_MHINTS_LOWLATENCY | NEXUS_MHINTS_HIUSE),
187 	},
188 	.nxdom_flowadv_max = {
189 		.nb_def = 0,
190 		.nb_min = 0,
191 		.nb_max = NX_FLOWADV_MAX,
192 	},
193 	.nxdom_nexusadv_size = {
194 		.nb_def = 0,
195 		.nb_min = 0,
196 		.nb_max = NX_NEXUSADV_MAX_SZ,
197 	},
198 	.nxdom_capabilities = {
199 		.nb_def = NXPCAP_USER_CHANNEL,
200 		.nb_min = NXPCAP_USER_CHANNEL,
201 		.nb_max = NXPCAP_USER_CHANNEL,
202 	},
203 	.nxdom_qmap = {
204 		.nb_def = NEXUS_QMAP_TYPE_INVALID,
205 		.nb_min = NEXUS_QMAP_TYPE_INVALID,
206 		.nb_max = NEXUS_QMAP_TYPE_INVALID,
207 	},
208 	.nxdom_max_frags = {
209 		.nb_def = NX_PBUF_FRAGS_DEFAULT,
210 		.nb_min = NX_PBUF_FRAGS_MIN,
211 		.nb_max = NX_PBUF_FRAGS_DEFAULT,
212 	},
213 	.nxdom_init =           nx_upipe_dom_init,
214 	.nxdom_terminate =      nx_upipe_dom_terminate,
215 	.nxdom_fini =           nx_upipe_dom_fini,
216 	.nxdom_find_port =      NULL,
217 	.nxdom_port_is_reserved = NULL,
218 	.nxdom_bind_port =      nx_upipe_dom_bind_port,
219 	.nxdom_unbind_port =    nx_upipe_dom_unbind_port,
220 	.nxdom_connect =        nx_upipe_dom_connect,
221 	.nxdom_disconnect =     nx_upipe_dom_disconnect,
222 	.nxdom_defunct =        nx_upipe_dom_defunct,
223 	.nxdom_defunct_finalize = nx_upipe_dom_defunct_finalize,
224 };
225 
226 static struct kern_nexus_domain_provider nx_upipe_prov_s = {
227 	.nxdom_prov_name =              NEXUS_PROVIDER_USER_PIPE,
228 	.nxdom_prov_flags =             NXDOMPROVF_DEFAULT,
229 	.nxdom_prov_cb = {
230 		.dp_cb_init =           nx_upipe_prov_init,
231 		.dp_cb_fini =           nx_upipe_prov_fini,
232 		.dp_cb_params =         nx_upipe_prov_params,
233 		.dp_cb_mem_new =        nx_upipe_prov_mem_new,
234 		.dp_cb_config =         NULL,
235 		.dp_cb_nx_ctor =        nx_upipe_prov_nx_ctor,
236 		.dp_cb_nx_dtor =        nx_upipe_prov_nx_dtor,
237 		.dp_cb_nx_mem_info =    NULL,
238 		.dp_cb_nx_mib_get =     NULL,
239 		.dp_cb_nx_stop =        NULL,
240 	},
241 };
242 
243 static ZONE_DECLARE(na_upipe_zone, SKMEM_ZONE_PREFIX ".na.upipe",
244     sizeof(struct nexus_upipe_adapter), ZC_ZFREE_CLEARMEM);
245 
246 static ZONE_DECLARE(nx_upipe_zone, SKMEM_ZONE_PREFIX ".nx.upipe",
247     sizeof(struct nx_upipe), ZC_ZFREE_CLEARMEM);
248 
249 #define SKMEM_TAG_PIPES "com.apple.skywalk.pipes"
250 static kern_allocation_name_t skmem_tag_pipes;
251 
252 static void
nx_upipe_dom_init(struct nxdom * nxdom)253 nx_upipe_dom_init(struct nxdom *nxdom)
254 {
255 	SK_LOCK_ASSERT_HELD();
256 	ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
257 
258 	ASSERT(skmem_tag_pipes == NULL);
259 	skmem_tag_pipes = kern_allocation_name_allocate(SKMEM_TAG_PIPES, 0);
260 	ASSERT(skmem_tag_pipes != NULL);
261 
262 	(void) nxdom_prov_add(nxdom, &nx_upipe_prov_s);
263 }
264 
265 static void
nx_upipe_dom_terminate(struct nxdom * nxdom)266 nx_upipe_dom_terminate(struct nxdom *nxdom)
267 {
268 	struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
269 
270 	STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
271 	    nxdom_prov_link, tnxdp) {
272 		(void) nxdom_prov_del(nxdom_prov);
273 	}
274 
275 	if (skmem_tag_pipes != NULL) {
276 		kern_allocation_name_release(skmem_tag_pipes);
277 		skmem_tag_pipes = NULL;
278 	}
279 }
280 
281 static void
nx_upipe_dom_fini(struct nxdom * nxdom)282 nx_upipe_dom_fini(struct nxdom *nxdom)
283 {
284 #pragma unused(nxdom)
285 }
286 
287 static int
nx_upipe_prov_init(struct kern_nexus_domain_provider * nxdom_prov)288 nx_upipe_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
289 {
290 #pragma unused(nxdom_prov)
291 	SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
292 	return 0;
293 }
294 
295 static int
nx_upipe_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)296 nx_upipe_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
297     const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
298 {
299 #pragma unused(nxdom_prov, nxp)
300 	/*
301 	 * User pipe requires double the amount of rings.
302 	 * The ring counts must also be symmetrical.
303 	 */
304 	if (*(adj->adj_tx_rings) != *(adj->adj_rx_rings)) {
305 		SK_ERR("rings: tx (%u) != rx (%u)", *(adj->adj_tx_rings),
306 		    *(adj->adj_rx_rings));
307 		return EINVAL;
308 	}
309 
310 	*(adj->adj_tx_rings) *= 2;
311 	*(adj->adj_rx_rings) *= 2;
312 
313 	if (adj->adj_buf_srp->srp_r_seg_size == 0) {
314 		adj->adj_buf_srp->srp_r_seg_size = skmem_usr_buf_seg_size;
315 	}
316 
317 	/* enable magazines layer for metadata */
318 	*(adj->adj_md_magazines) = TRUE;
319 
320 	return 0;
321 }
322 
323 static int
nx_upipe_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS])324 nx_upipe_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
325     const uint32_t req, const struct nxprov_params *nxp0,
326     struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS])
327 {
328 	struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
329 	int err;
330 
331 	err = nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
332 	    nxdom, nxdom, nxdom, nx_upipe_prov_params_adjust);
333 #if (DEVELOPMENT || DEBUG)
334 	/* sysctl override */
335 	if ((err == 0) && (nx_upipe_mhints != 0)) {
336 		nxp->nxp_mhints = nx_upipe_mhints;
337 	}
338 #endif /* (DEVELOPMENT || DEBUG) */
339 	return err;
340 }
341 
342 static int
nx_upipe_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)343 nx_upipe_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
344     struct kern_nexus *nx, struct nexus_adapter *na)
345 {
346 #pragma unused(nxdom_prov)
347 	int err = 0;
348 
349 	SK_DF(SK_VERB_USER_PIPE,
350 	    "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
351 	    NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
352 	    SK_KVA(na));
353 
354 	ASSERT(na->na_arena == NULL);
355 	ASSERT(NX_USER_CHANNEL_PROV(nx));
356 	/*
357 	 * The underlying nexus adapters already share the same memory
358 	 * allocator, and thus we don't care about storing the pp in
359 	 * the nexus.
360 	 *
361 	 * This means that clients calling kern_nexus_get_pbufpool()
362 	 * will get NULL, but this is fine since we don't expose the
363 	 * user pipe to external kernel clients.
364 	 */
365 	na->na_arena = skmem_arena_create_for_nexus(na,
366 	    NX_PROV(nx)->nxprov_region_params, NULL, NULL, FALSE,
367 	    FALSE, NULL, &err);
368 	ASSERT(na->na_arena != NULL || err != 0);
369 
370 	return err;
371 }
372 
373 static void
nx_upipe_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)374 nx_upipe_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
375 {
376 #pragma unused(nxdom_prov)
377 	SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
378 }
379 
380 static int
nx_upipe_prov_nx_ctor(struct kern_nexus * nx)381 nx_upipe_prov_nx_ctor(struct kern_nexus *nx)
382 {
383 	SK_LOCK_ASSERT_HELD();
384 	ASSERT(nx->nx_arg == NULL);
385 
386 	SK_D("nexus 0x%llx (%s)", SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name);
387 
388 	nx->nx_arg = nx_upipe_alloc(Z_WAITOK);
389 	SK_D("create new upipe 0x%llx for nexus 0x%llx",
390 	    SK_KVA(NX_UPIPE_PRIVATE(nx)), SK_KVA(nx));
391 
392 	return 0;
393 }
394 
395 static void
nx_upipe_prov_nx_dtor(struct kern_nexus * nx)396 nx_upipe_prov_nx_dtor(struct kern_nexus *nx)
397 {
398 	struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
399 
400 	SK_LOCK_ASSERT_HELD();
401 
402 	SK_D("nexus 0x%llx (%s) upipe 0x%llx", SK_KVA(nx),
403 	    NX_DOM_PROV(nx)->nxdom_prov_name, SK_KVA(u));
404 
405 	if (u->nup_cli_nxb != NULL) {
406 		nxb_free(u->nup_cli_nxb);
407 		u->nup_cli_nxb = NULL;
408 	}
409 	if (u->nup_srv_nxb != NULL) {
410 		nxb_free(u->nup_srv_nxb);
411 		u->nup_srv_nxb = NULL;
412 	}
413 
414 	SK_DF(SK_VERB_USER_PIPE, "marking upipe 0x%llx as free", SK_KVA(u));
415 	nx_upipe_free(u);
416 	nx->nx_arg = NULL;
417 }
418 
419 static struct nexus_upipe_adapter *
na_upipe_alloc(zalloc_flags_t how)420 na_upipe_alloc(zalloc_flags_t how)
421 {
422 	struct nexus_upipe_adapter *pna;
423 
424 	_CASSERT(offsetof(struct nexus_upipe_adapter, pna_up) == 0);
425 
426 	pna = zalloc_flags(na_upipe_zone, how | Z_ZERO);
427 	if (pna) {
428 		pna->pna_up.na_type = NA_USER_PIPE;
429 		pna->pna_up.na_free = na_upipe_free;
430 	}
431 	return pna;
432 }
433 
434 static void
na_upipe_free(struct nexus_adapter * na)435 na_upipe_free(struct nexus_adapter *na)
436 {
437 	struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
438 
439 	ASSERT(pna->pna_up.na_refcount == 0);
440 	SK_DF(SK_VERB_MEM, "pna 0x%llx FREE", SK_KVA(pna));
441 	bzero(pna, sizeof(*pna));
442 	zfree(na_upipe_zone, pna);
443 }
444 
445 static int
nx_upipe_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb0,void * info)446 nx_upipe_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
447     struct nxbind *nxb0, void *info)
448 {
449 #pragma unused(info)
450 	struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
451 	struct nxbind *nxb = NULL;
452 	int error = 0;
453 
454 	ASSERT(nx_port != NULL);
455 	ASSERT(nxb0 != NULL);
456 
457 	switch (*nx_port) {
458 	case NEXUS_PORT_USER_PIPE_CLIENT:
459 	case NEXUS_PORT_USER_PIPE_SERVER:
460 		if ((*nx_port == NEXUS_PORT_USER_PIPE_CLIENT &&
461 		    u->nup_cli_nxb != NULL) ||
462 		    (*nx_port == NEXUS_PORT_USER_PIPE_SERVER &&
463 		    u->nup_srv_nxb != NULL)) {
464 			error = EEXIST;
465 			break;
466 		}
467 
468 		nxb = nxb_alloc(Z_WAITOK);
469 		nxb_move(nxb0, nxb);
470 		if (*nx_port == NEXUS_PORT_USER_PIPE_CLIENT) {
471 			u->nup_cli_nxb = nxb;
472 		} else {
473 			u->nup_srv_nxb = nxb;
474 		}
475 
476 		ASSERT(error == 0);
477 		break;
478 
479 	default:
480 		error = EDOM;
481 		break;
482 	}
483 
484 	return error;
485 }
486 
487 static int
nx_upipe_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)488 nx_upipe_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
489 {
490 	struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
491 	struct nxbind *nxb = NULL;
492 	int error = 0;
493 
494 	ASSERT(nx_port != NEXUS_PORT_ANY);
495 
496 	switch (nx_port) {
497 	case NEXUS_PORT_USER_PIPE_CLIENT:
498 	case NEXUS_PORT_USER_PIPE_SERVER:
499 		if ((nx_port == NEXUS_PORT_USER_PIPE_CLIENT &&
500 		    u->nup_cli_nxb == NULL) ||
501 		    (nx_port == NEXUS_PORT_USER_PIPE_SERVER &&
502 		    u->nup_srv_nxb == NULL)) {
503 			error = ENOENT;
504 			break;
505 		}
506 
507 		if (nx_port == NEXUS_PORT_USER_PIPE_CLIENT) {
508 			nxb = u->nup_cli_nxb;
509 			u->nup_cli_nxb = NULL;
510 		} else {
511 			nxb = u->nup_srv_nxb;
512 			u->nup_srv_nxb = NULL;
513 		}
514 		nxb_free(nxb);
515 		ASSERT(error == 0);
516 		break;
517 
518 	default:
519 		error = EDOM;
520 		break;
521 	}
522 
523 	return error;
524 }
525 
526 static int
nx_upipe_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)527 nx_upipe_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
528     struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
529     struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
530 {
531 #pragma unused(nxdom_prov)
532 	nexus_port_t port = chr->cr_port;
533 	int err = 0;
534 
535 	SK_LOCK_ASSERT_HELD();
536 
537 	ASSERT(NX_DOM_PROV(nx) == nxdom_prov);
538 	ASSERT(nx->nx_prov->nxprov_params->nxp_type ==
539 	    nxdom_prov->nxdom_prov_dom->nxdom_type &&
540 	    nx->nx_prov->nxprov_params->nxp_type == NEXUS_TYPE_USER_PIPE);
541 
542 	/*
543 	 * XXX: channel in user packet pool mode is not supported for
544 	 * user-pipe for now.
545 	 */
546 	if (chr->cr_mode & CHMODE_USER_PACKET_POOL) {
547 		SK_ERR("User packet pool mode not supported for upipe");
548 		err = ENOTSUP;
549 		goto done;
550 	}
551 
552 	if (chr->cr_mode & CHMODE_EVENT_RING) {
553 		SK_ERR("event ring is not supported for upipe");
554 		err = ENOTSUP;
555 		goto done;
556 	}
557 
558 	if (chr->cr_mode & CHMODE_LOW_LATENCY) {
559 		SK_ERR("low latency is not supported for upipe");
560 		err = ENOTSUP;
561 		goto done;
562 	}
563 
564 	if (port == NEXUS_PORT_USER_PIPE_SERVER) {
565 		chr->cr_real_endpoint = CH_ENDPOINT_USER_PIPE_MASTER;
566 	} else if (port == NEXUS_PORT_USER_PIPE_CLIENT) {
567 		chr->cr_real_endpoint = CH_ENDPOINT_USER_PIPE_SLAVE;
568 	} else {
569 		err = EINVAL;
570 		goto done;
571 	}
572 
573 	chr->cr_endpoint = chr->cr_real_endpoint;
574 	chr->cr_ring_set = RING_SET_DEFAULT;
575 	chr->cr_pipe_id = 0;
576 	(void) snprintf(chr->cr_name, sizeof(chr->cr_name), "upipe:%llu:%.*s",
577 	    nx->nx_id, (int)nx->nx_prov->nxprov_params->nxp_namelen,
578 	    nx->nx_prov->nxprov_params->nxp_name);
579 
580 	err = na_connect(nx, ch, chr, ch0, nxb, p);
581 done:
582 	return err;
583 }
584 
585 static void
nx_upipe_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)586 nx_upipe_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
587     struct kern_nexus *nx, struct kern_channel *ch)
588 {
589 #pragma unused(nxdom_prov)
590 	SK_LOCK_ASSERT_HELD();
591 
592 	SK_D("channel 0x%llx -!- nexus 0x%llx (%s:\"%s\":%u:%d)", SK_KVA(ch),
593 	    SK_KVA(nx), nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
594 	    ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
595 
596 	na_disconnect(nx, ch);
597 	/*
598 	 * Set NXF_REJECT on the nexus which would cause any channel on the
599 	 * peer adapter to cease to function.
600 	 */
601 	if (NX_PROV(nx)->nxprov_params->nxp_reject_on_close) {
602 		atomic_bitset_32(&nx->nx_flags, NXF_REJECT);
603 	}
604 }
605 
606 static void
nx_upipe_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)607 nx_upipe_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
608     struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
609 {
610 #pragma unused(nxdom_prov, nx)
611 	struct nexus_adapter *na = ch->ch_na;
612 	struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
613 	ring_id_t qfirst = ch->ch_first[NR_TX];
614 	ring_id_t qlast = ch->ch_last[NR_TX];
615 	uint32_t i;
616 
617 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
618 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
619 	ASSERT(na->na_type == NA_USER_PIPE);
620 
621 	/*
622 	 * Inform the peer receiver thread in nx_upipe_na_rxsync() or the
623 	 * peer transmit thread in nx_upipe_na_txsync() about
624 	 * this endpoint going defunct.  We utilize the TX ring's
625 	 * lock for serialization, since that is what's being used
626 	 * by the receiving endpoint.
627 	 */
628 	for (i = qfirst; i < qlast; i++) {
629 		/*
630 		 * For maintaining lock ordering between the two channels of
631 		 * user pipe.
632 		 */
633 		if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
634 			(void) kr_enter(&NAKR(na, NR_TX)[i], TRUE);
635 			(void) kr_enter(NAKR(na, NR_RX)[i].ckr_pipe, TRUE);
636 		} else {
637 			(void) kr_enter(NAKR(na, NR_RX)[i].ckr_pipe, TRUE);
638 			(void) kr_enter(&NAKR(na, NR_TX)[i], TRUE);
639 		}
640 	}
641 
642 	na_ch_rings_defunct(ch, p);
643 
644 	for (i = qfirst; i < qlast; i++) {
645 		if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
646 			(void) kr_exit(NAKR(na, NR_RX)[i].ckr_pipe);
647 			(void) kr_exit(&NAKR(na, NR_TX)[i]);
648 		} else {
649 			(void) kr_exit(&NAKR(na, NR_TX)[i]);
650 			(void) kr_exit(NAKR(na, NR_RX)[i].ckr_pipe);
651 		}
652 	}
653 }
654 
655 static void
nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)656 nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
657     struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
658 {
659 #pragma unused(nxdom_prov)
660 	struct nexus_upipe_adapter *pna =
661 	    (struct nexus_upipe_adapter *)ch->ch_na;
662 
663 	if (!locked) {
664 		SK_LOCK_ASSERT_NOTHELD();
665 		SK_LOCK();
666 		LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
667 	} else {
668 		SK_LOCK_ASSERT_HELD();
669 		LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
670 	}
671 
672 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
673 	ASSERT(ch->ch_na->na_type == NA_USER_PIPE);
674 
675 	/*
676 	 * At this point, we know that the arena shared by the master and
677 	 * slave adapters has no more valid mappings on the channels opened
678 	 * to them.  We need to invoke na_defunct() on both adapters to
679 	 * release any remaining slots attached to their rings.
680 	 *
681 	 * Note that the 'ch' that we pass in here is irrelevant as we
682 	 * don't support user packet pool for user pipe.
683 	 */
684 	na_defunct(nx, ch, &pna->pna_up, locked);
685 	if (pna->pna_peer != NULL) {
686 		na_defunct(nx, ch, &pna->pna_peer->pna_up, locked);
687 	}
688 
689 	/*
690 	 * And if their parent adapter (the memory owner) is a pseudo
691 	 * nexus adapter that we initially created in nx_upipe_na_find(),
692 	 * invoke na_defunct() on it now to do the final teardown on
693 	 * the arena.
694 	 */
695 	if (pna->pna_parent->na_type == NA_PSEUDO) {
696 		na_defunct(nx, ch, pna->pna_parent, locked);
697 	}
698 
699 	SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d)",
700 	    ch->ch_name, ch->ch_pid, SK_KVA(ch), SK_KVA(nx),
701 	    nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
702 	    ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
703 
704 	if (!locked) {
705 		LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
706 		SK_UNLOCK();
707 	} else {
708 		LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
709 		SK_LOCK_ASSERT_HELD();
710 	}
711 }
712 
713 /* allocate the pipe array in the parent adapter */
714 static int
nx_upipe_na_alloc(struct nexus_adapter * na,uint32_t npipes)715 nx_upipe_na_alloc(struct nexus_adapter *na, uint32_t npipes)
716 {
717 	struct nexus_upipe_adapter **npa;
718 	size_t len, orig_len;
719 
720 	if (npipes <= na->na_max_pipes) {
721 		/* we already have more entries that requested */
722 		return 0;
723 	}
724 	if (npipes < na->na_next_pipe || npipes > NX_UPIPE_MAXPIPES) {
725 		return EINVAL;
726 	}
727 
728 	orig_len = sizeof(struct nexus_upipe_adapter *) * na->na_max_pipes;
729 	len = sizeof(struct nexus_upipe_adapter *) * npipes;
730 	npa = sk_realloc(na->na_pipes, orig_len, len, Z_WAITOK, skmem_tag_pipes);
731 	if (npa == NULL) {
732 		return ENOMEM;
733 	}
734 
735 	na->na_pipes = npa;
736 	na->na_max_pipes = npipes;
737 
738 	return 0;
739 }
740 
741 /* deallocate the parent array in the parent adapter */
742 void
nx_upipe_na_dealloc(struct nexus_adapter * na)743 nx_upipe_na_dealloc(struct nexus_adapter *na)
744 {
745 	if (na->na_pipes) {
746 		if (na->na_next_pipe > 0) {
747 			SK_ERR("freeing not empty pipe array for %s "
748 			    "(%u dangling pipes)!", na->na_name,
749 			    na->na_next_pipe);
750 		}
751 		sk_free(na->na_pipes,
752 		    sizeof(struct nexus_upipe_adapter *) * na->na_max_pipes);
753 		na->na_pipes = NULL;
754 		na->na_max_pipes = 0;
755 		na->na_next_pipe = 0;
756 	}
757 }
758 
759 /* find a pipe endpoint with the given id among the parent's pipes */
760 static struct nexus_upipe_adapter *
nx_upipe_find(struct nexus_adapter * parent,uint32_t pipe_id)761 nx_upipe_find(struct nexus_adapter *parent, uint32_t pipe_id)
762 {
763 	uint32_t i;
764 	struct nexus_upipe_adapter *na;
765 
766 	for (i = 0; i < parent->na_next_pipe; i++) {
767 		na = parent->na_pipes[i];
768 		if (na->pna_id == pipe_id) {
769 			return na;
770 		}
771 	}
772 	return NULL;
773 }
774 
775 /* add a new pipe endpoint to the parent array */
776 static int
nx_upipe_na_add(struct nexus_adapter * parent,struct nexus_upipe_adapter * na)777 nx_upipe_na_add(struct nexus_adapter *parent, struct nexus_upipe_adapter *na)
778 {
779 	if (parent->na_next_pipe >= parent->na_max_pipes) {
780 		uint32_t npipes = parent->na_max_pipes ?
781 		    2 * parent->na_max_pipes : 2;
782 		int error = nx_upipe_na_alloc(parent, npipes);
783 		if (error) {
784 			return error;
785 		}
786 	}
787 
788 	parent->na_pipes[parent->na_next_pipe] = na;
789 	na->pna_parent_slot = parent->na_next_pipe;
790 	parent->na_next_pipe++;
791 	return 0;
792 }
793 
794 /* remove the given pipe endpoint from the parent array */
795 static void
nx_upipe_na_remove(struct nexus_adapter * parent,struct nexus_upipe_adapter * na)796 nx_upipe_na_remove(struct nexus_adapter *parent, struct nexus_upipe_adapter *na)
797 {
798 	uint32_t n;
799 	n = --parent->na_next_pipe;
800 	if (n != na->pna_parent_slot) {
801 		struct nexus_upipe_adapter **p =
802 		    &parent->na_pipes[na->pna_parent_slot];
803 		*p = parent->na_pipes[n];
804 		(*p)->pna_parent_slot = na->pna_parent_slot;
805 	}
806 	parent->na_pipes[n] = NULL;
807 }
808 
809 static int
nx_upipe_na_txsync(struct __kern_channel_ring * txkring,struct proc * p,uint32_t flags)810 nx_upipe_na_txsync(struct __kern_channel_ring *txkring, struct proc *p,
811     uint32_t flags)
812 {
813 	struct __kern_channel_ring *rxkring = txkring->ckr_pipe;
814 	volatile uint64_t *tx_tsync, *tx_tnote, *rx_tsync;
815 	int sent = 0, ret = 0;
816 
817 	SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
818 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u "
819 	    "flags 0x%x -> kr \"%s\" (0x%llx) krflags 0x%b ring %u",
820 	    sk_proc_name_address(p), sk_proc_pid(p), txkring->ckr_name,
821 	    SK_KVA(txkring), txkring->ckr_flags, CKRF_BITS,
822 	    txkring->ckr_ring_id, flags, rxkring->ckr_name, SK_KVA(rxkring),
823 	    rxkring->ckr_flags, CKRF_BITS, rxkring->ckr_ring_id);
824 
825 	/*
826 	 * Serialize write access to the transmit ring, since another
827 	 * thread coming down for rxsync might pick up pending slots.
828 	 */
829 	ASSERT(txkring->ckr_owner == current_thread());
830 
831 	/*
832 	 * Record the time of sync and grab sync time of other side;
833 	 * use atomic store and load since we're not holding the
834 	 * lock used by the receive ring.  This allows us to avoid
835 	 * the potentially costly membar_sync().
836 	 */
837 	/* deconst */
838 	tx_tsync = __DECONST(uint64_t *, &txkring->ckr_ring->ring_sync_time);
839 	atomic_set_64(tx_tsync, txkring->ckr_sync_time);
840 
841 	/*
842 	 * Read from the peer's kring, not its user ring; the peer's channel
843 	 * may be defunct, in which case it's unsafe to access its user ring.
844 	 */
845 	rx_tsync = __DECONST(uint64_t *, &rxkring->ckr_sync_time);
846 	tx_tnote = __DECONST(uint64_t *, &txkring->ckr_ring->ring_notify_time);
847 	*tx_tnote = atomic_add_64_ov(rx_tsync, 0);
848 
849 	if (__probable(txkring->ckr_rhead != txkring->ckr_khead)) {
850 		sent = nx_upipe_na_txsync_locked(txkring, p, flags,
851 		    &ret, FALSE);
852 	}
853 
854 	if (sent != 0) {
855 		(void) rxkring->ckr_na_notify(rxkring, p, 0);
856 	}
857 
858 	return ret;
859 }
860 
861 int
nx_upipe_na_txsync_locked(struct __kern_channel_ring * txkring,struct proc * p,uint32_t flags,int * ret,boolean_t rx)862 nx_upipe_na_txsync_locked(struct __kern_channel_ring *txkring, struct proc *p,
863     uint32_t flags, int *ret, boolean_t rx)
864 {
865 #pragma unused(p, flags, rx)
866 	struct __kern_channel_ring *rxkring = txkring->ckr_pipe;
867 	const slot_idx_t lim_tx = txkring->ckr_lim;
868 	const slot_idx_t lim_rx = rxkring->ckr_lim;
869 	slot_idx_t j, k;
870 	int n, m, b, sent = 0;
871 	uint32_t byte_count = 0;
872 	int limit; /* max # of slots to transfer */
873 
874 	*ret = 0;
875 
876 	SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
877 	    "%s(%d) kr \"%s\", kh %3u kt %3u | "
878 	    "rh %3u rt %3u [pre%s]", sk_proc_name_address(p),
879 	    sk_proc_pid(p), txkring->ckr_name, txkring->ckr_khead,
880 	    txkring->ckr_ktail, txkring->ckr_rhead,
881 	    txkring->ckr_rtail, rx ? "*" : "");
882 	SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
883 	    "%s(%d) kr \"%s\", kh %3u kt %3u | "
884 	    "rh %3u rt %3u [pre%s]", sk_proc_name_address(p),
885 	    sk_proc_pid(p), rxkring->ckr_name, rxkring->ckr_khead,
886 	    rxkring->ckr_ktail, rxkring->ckr_rhead,
887 	    rxkring->ckr_rtail, rx ? "*" : "");
888 
889 	if (__improbable(KR_DROP(txkring) || KR_DROP(rxkring))) {
890 		*ret = ENXIO;
891 		goto done;
892 	}
893 
894 	j = rxkring->ckr_ktail; /* RX */
895 	k = txkring->ckr_khead;  /* TX */
896 
897 	/* # of new tx slots */
898 	n = txkring->ckr_rhead - txkring->ckr_khead;
899 	if (n < 0) {
900 		n += txkring->ckr_num_slots;
901 	}
902 	limit = n;
903 
904 	/* # of rx busy (unclaimed) slots */
905 	b = j - rxkring->ckr_khead;
906 	if (b < 0) {
907 		b += rxkring->ckr_num_slots;
908 	}
909 
910 	/* # of rx avail free slots (subtract busy from max) */
911 	m = lim_rx - b;
912 	if (m < limit) {
913 		limit = m;
914 	}
915 
916 	SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
917 	    "%s(%d) kr \"%s\" -> new %u, kr \"%s\" "
918 	    "-> free %u", sk_proc_name_address(p), sk_proc_pid(p),
919 	    txkring->ckr_name, n, rxkring->ckr_name, m);
920 
921 	/* rxring is full, or nothing to send? */
922 	if (__improbable((sent = limit) == 0)) {
923 		SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
924 		    "%s(%d) kr \"%s\" -> %s%s",
925 		    sk_proc_name_address(p), sk_proc_pid(p), (n > m) ?
926 		    rxkring->ckr_name : txkring->ckr_name, ((n > m) ?
927 		    "no room avail" : "no new slots"),
928 		    (rx ? " (lost race, ok)" : ""));
929 		goto done;
930 	}
931 
932 	ASSERT(limit > 0);
933 	while (limit--) {
934 		struct __kern_slot_desc *ksd_tx = KR_KSD(txkring, k);
935 		struct __user_slot_desc *usd_tx = KR_USD(txkring, k);
936 		struct __kern_slot_desc *ksd_rx = KR_KSD(rxkring, j);
937 		struct __user_slot_desc *usd_rx = KR_USD(rxkring, j);
938 		struct __kern_quantum *kqum;
939 
940 		kqum = ksd_tx->sd_qum;
941 		/*
942 		 * Packets failing internalization should be dropped in
943 		 * TX sync prologue.
944 		 */
945 		ASSERT((kqum->qum_qflags & (QUM_F_INTERNALIZED |
946 		    QUM_F_FINALIZED)) == (QUM_F_INTERNALIZED |
947 		    QUM_F_FINALIZED));
948 
949 		byte_count += kqum->qum_len;
950 
951 		/*
952 		 * Swap the slots.
953 		 *
954 		 * XXX: [email protected] -- this bypasses the slot attach/detach
955 		 * interface, and needs to be changed when upipe adopts the
956 		 * packet APIs.  SD_SWAP() will perform a block copy of the
957 		 * swap, and will readjust the kernel slot descriptor's sd_user
958 		 * accordingly.
959 		 */
960 		SD_SWAP(ksd_rx, usd_rx, ksd_tx, usd_tx);
961 
962 		j = SLOT_NEXT(j, lim_rx);
963 		k = SLOT_NEXT(k, lim_tx);
964 	}
965 
966 	kr_update_stats(rxkring, sent, byte_count);
967 	if (__improbable(kr_stat_enable != 0)) {
968 		txkring->ckr_stats = rxkring->ckr_stats;
969 	}
970 
971 	/*
972 	 * Make sure the slots are updated before ckr_ktail reach global
973 	 * visibility, since we are not holding rx ring's kr_enter().
974 	 */
975 	membar_sync();
976 
977 	rxkring->ckr_ktail = j;
978 	txkring->ckr_khead = k;
979 	txkring->ckr_ktail = SLOT_PREV(k, lim_tx);
980 
981 done:
982 	SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
983 	    "%s(%d) kr \"%s\", kh %3u kt %3u | "
984 	    "rh %3u rt %3u [post%s]", sk_proc_name_address(p),
985 	    sk_proc_pid(p), txkring->ckr_name, txkring->ckr_khead,
986 	    txkring->ckr_ktail, txkring->ckr_rhead,
987 	    txkring->ckr_rtail, rx ? "*" : "");
988 	SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
989 	    "%s(%d) kr \"%s\", kh %3u kt %3u | "
990 	    "rh %3u rt %3u [post%s]", sk_proc_name_address(p),
991 	    sk_proc_pid(p), rxkring->ckr_name, rxkring->ckr_khead,
992 	    rxkring->ckr_ktail, rxkring->ckr_rhead,
993 	    rxkring->ckr_rtail, rx ? "*" : "");
994 
995 	return sent;
996 }
997 
998 static int
nx_upipe_na_rxsync(struct __kern_channel_ring * rxkring,struct proc * p,uint32_t flags)999 nx_upipe_na_rxsync(struct __kern_channel_ring *rxkring, struct proc *p,
1000     uint32_t flags)
1001 {
1002 #pragma unused(p)
1003 	struct __kern_channel_ring *txkring = rxkring->ckr_pipe;
1004 	volatile uint64_t *rx_tsync, *rx_tnote, *tx_tsync;
1005 	const slot_idx_t lim_rx = rxkring->ckr_lim;
1006 	int n; /* new slots from transmit side */
1007 	int m, b, ret = 0;
1008 	uint32_t r;
1009 
1010 	SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1011 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u "
1012 	    "flags 0x%x <- kr \"%s\" (0x%llx) krflags 0x%b ring %u",
1013 	    sk_proc_name_address(p), sk_proc_pid(p), rxkring->ckr_name,
1014 	    SK_KVA(rxkring), rxkring->ckr_flags, CKRF_BITS,
1015 	    rxkring->ckr_ring_id, flags, txkring->ckr_name, SK_KVA(txkring),
1016 	    txkring->ckr_flags, CKRF_BITS, txkring->ckr_ring_id);
1017 
1018 	ASSERT(rxkring->ckr_owner == current_thread());
1019 
1020 	/* reclaim and get # of rx reclaimed slots */
1021 	r = kr_reclaim(rxkring);
1022 
1023 	/* # of rx busy (unclaimed) slots */
1024 	b = rxkring->ckr_ktail - rxkring->ckr_khead;
1025 	if (b < 0) {
1026 		b += rxkring->ckr_num_slots;
1027 	}
1028 
1029 	/* # of rx avail free slots (subtract busy from max) */
1030 	m = lim_rx - b;
1031 
1032 	/*
1033 	 * Check if there's any new slots on transmit ring; do this
1034 	 * first without acquiring that ring's ckr_qlock, and use
1035 	 * the memory barrier (paired with second one in txsync.)
1036 	 * If we missed the race we'd just pay the cost of acquiring
1037 	 * ckr_qlock and potentially returning from "internal txsync"
1038 	 * without anything to process, which is okay.
1039 	 */
1040 	membar_sync();
1041 	n = txkring->ckr_rhead - txkring->ckr_khead;
1042 	if (n < 0) {
1043 		n += txkring->ckr_num_slots;
1044 	}
1045 
1046 	SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1047 	    "%s(%d) kr \"%s\" <- free %u, kr \"%s\" <- new %u",
1048 	    sk_proc_name_address(p), sk_proc_pid(p),
1049 	    rxkring->ckr_name, m, txkring->ckr_name, n);
1050 
1051 	/*
1052 	 * Record the time of sync and grab sync time of other side;
1053 	 * use atomic store and load since we're not holding the
1054 	 * lock used by the receive ring.  This allows us to avoid
1055 	 * the potentially costly membar_sync().
1056 	 */
1057 	/* deconst */
1058 	rx_tsync = __DECONST(uint64_t *, &rxkring->ckr_ring->ring_sync_time);
1059 	atomic_set_64(rx_tsync, rxkring->ckr_sync_time);
1060 
1061 	/*
1062 	 * Read from the peer's kring, not its user ring; the peer's channel
1063 	 * may be defunct, in which case it's unsafe to access its user ring.
1064 	 */
1065 	tx_tsync = __DECONST(uint64_t *, &txkring->ckr_sync_time);
1066 	rx_tnote = __DECONST(uint64_t *, &rxkring->ckr_ring->ring_notify_time);
1067 	*rx_tnote = atomic_add_64_ov(tx_tsync, 0);
1068 
1069 	/*
1070 	 * If we have slots to pick up from the transmit side and and we
1071 	 * have space available, perform an equivalent of "internal txsync".
1072 	 *
1073 	 * Acquire write access to the transmit (peer) ring,
1074 	 * Serialize write access to it, since another thread
1075 	 * coming down for txsync might add new slots.
1076 	 * If we fail to get the kring lock, then don't worry because
1077 	 * there's already a transmit sync in progress to move packets.
1078 	 */
1079 	if (__probable(n != 0 && m != 0 && (flags & NA_SYNCF_MONITOR) == 0)) {
1080 		(void) kr_enter(txkring, TRUE);
1081 		n = nx_upipe_na_txsync_locked(txkring, p, flags, &ret, TRUE);
1082 		kr_exit(txkring);
1083 	} else {
1084 		n = 0;
1085 	}
1086 
1087 	/*
1088 	 * If we have reclaimed some slots or transferred new slots
1089 	 * from the transmit side, notify the other end.  Also notify
1090 	 * ourselves to pick up newly transferred ones, if any.
1091 	 */
1092 	if (__probable(r != 0 || n != 0)) {
1093 		SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1094 		    "%s(%d) kr \"%s\", kh %3u kt %3u | "
1095 		    "rh %3u rt %3u [rel %u new %u]",
1096 		    sk_proc_name_address(p), sk_proc_pid(p), rxkring->ckr_name,
1097 		    rxkring->ckr_khead, rxkring->ckr_ktail,
1098 		    rxkring->ckr_rhead, rxkring->ckr_rtail, r, n);
1099 
1100 		(void) txkring->ckr_na_notify(txkring, p, 0);
1101 	}
1102 
1103 	return ret;
1104 }
1105 
1106 static int
nx_upipe_na_rings_create(struct nexus_adapter * na,struct kern_channel * ch)1107 nx_upipe_na_rings_create(struct nexus_adapter *na, struct kern_channel *ch)
1108 {
1109 	struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1110 	struct nexus_adapter *ona = &pna->pna_peer->pna_up;
1111 	int error = 0;
1112 	enum txrx t;
1113 	uint32_t i;
1114 
1115 	/*
1116 	 * Create krings and all the rings for this end;
1117 	 * we'll update ckr_save_ring pointers below.
1118 	 */
1119 	error = na_rings_mem_setup(na, 0, FALSE, ch);
1120 	if (error != 0) {
1121 		goto err;
1122 	}
1123 
1124 	/* update our hidden ring pointers */
1125 	for_rx_tx(t) {
1126 		for (i = 0; i < na_get_nrings(na, t); i++) {
1127 			NAKR(na, t)[i].ckr_save_ring =
1128 			    NAKR(na, t)[i].ckr_ring;
1129 		}
1130 	}
1131 
1132 	/* now, create krings and rings of the other end */
1133 	error = na_rings_mem_setup(ona, 0, FALSE, ch);
1134 	if (error != 0) {
1135 		na_rings_mem_teardown(na, ch, FALSE);   /* this end */
1136 		goto err;
1137 	}
1138 
1139 	for_rx_tx(t) {
1140 		for (i = 0; i < na_get_nrings(ona, t); i++) {
1141 			NAKR(ona, t)[i].ckr_save_ring =
1142 			    NAKR(ona, t)[i].ckr_ring;
1143 		}
1144 	}
1145 
1146 	/* cross link the krings */
1147 	for_rx_tx(t) {
1148 		/* swap NR_TX <-> NR_RX (skip host ring) */
1149 		enum txrx r = sk_txrx_swap(t);
1150 		for (i = 0; i < na_get_nrings(na, t); i++) {
1151 			NAKR(na, t)[i].ckr_pipe =
1152 			    NAKR(&pna->pna_peer->pna_up, r) + i;
1153 			NAKR(&pna->pna_peer->pna_up, r)[i].ckr_pipe =
1154 			    NAKR(na, t) + i;
1155 		}
1156 	}
1157 err:
1158 	return error;
1159 }
1160 
1161 /*
1162  * Pipe endpoints are created and destroyed together, so that endopoints do not
1163  * have to check for the existence of their peer at each ?xsync.
1164  *
1165  * To play well with the existing nexus adapter infrastructure (refcounts etc.),
1166  * we adopt the following strategy:
1167  *
1168  * 1) The first endpoint that is created also creates the other endpoint and
1169  * grabs a reference to it.
1170  *
1171  *    state A)  user1 --> endpoint1 --> endpoint2
1172  *
1173  * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
1174  * its reference to the user:
1175  *
1176  *    state B)  user1 --> endpoint1     endpoint2 <--- user2
1177  *
1178  * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
1179  * callback endpoint2 notes that endpoint1 is still active and adds a reference
1180  * from endpoint1 to itself. When user2 then releases her own reference,
1181  * endpoint2 is not destroyed and we are back to state A. A symmetrical state
1182  * would be reached if endpoint1 were released instead.
1183  *
1184  * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
1185  * it owns a reference to endpoint2 and releases it.
1186  *
1187  * Something similar goes on for the creation and destruction of the krings.
1188  */
1189 
1190 
1191 /*
1192  * nx_upipe_na_krings_create.
1193  *
1194  * There are two cases:
1195  *
1196  * 1) state is
1197  *
1198  *        usr1 --> e1 --> e2
1199  *
1200  *    and we are e1. We have to create both sets
1201  *    of krings.
1202  *
1203  * 2) state is
1204  *
1205  *        usr1 --> e1 --> e2
1206  *
1207  *    and we are e2. e1 is certainly registered and our
1208  *    krings already exist, but they may be hidden.
1209  */
1210 static int
nx_upipe_na_krings_create(struct nexus_adapter * na,struct kern_channel * ch)1211 nx_upipe_na_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
1212 {
1213 	struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1214 	int error = 0;
1215 	enum txrx t;
1216 	uint32_t i;
1217 
1218 	/*
1219 	 * Verify symmetrical ring counts; validated
1220 	 * at nexus provider registration time.
1221 	 */
1222 	ASSERT(na_get_nrings(na, NR_TX) == na_get_nrings(na, NR_RX));
1223 
1224 	if (pna->pna_peer_ref) {
1225 		/* case 1) above */
1226 		SK_DF(SK_VERB_USER_PIPE,
1227 		    "0x%llx: case 1, create everything", SK_KVA(na));
1228 		error = nx_upipe_na_rings_create(na, ch);
1229 	} else {
1230 		/* case 2) above */
1231 		/* recover the hidden rings */
1232 		SK_DF(SK_VERB_USER_PIPE,
1233 		    "0x%llx: case 2, hidden rings", SK_KVA(na));
1234 		for_rx_tx(t) {
1235 			for (i = 0; i < na_get_nrings(na, t); i++) {
1236 				NAKR(na, t)[i].ckr_ring =
1237 				    NAKR(na, t)[i].ckr_save_ring;
1238 			}
1239 		}
1240 	}
1241 
1242 	ASSERT(error == 0 || (na->na_tx_rings == NULL &&
1243 	    na->na_rx_rings == NULL && na->na_slot_ctxs == NULL));
1244 	ASSERT(error == 0 || (pna->pna_peer->pna_up.na_tx_rings == NULL &&
1245 	    pna->pna_peer->pna_up.na_rx_rings == NULL &&
1246 	    pna->pna_peer->pna_up.na_slot_ctxs == NULL));
1247 
1248 	return error;
1249 }
1250 
1251 /*
1252  * nx_upipe_na_activate.
1253  *
1254  * There are two cases on registration (onoff==1)
1255  *
1256  * 1.a) state is
1257  *
1258  *        usr1 --> e1 --> e2
1259  *
1260  *      and we are e1. Nothing special to do.
1261  *
1262  * 1.b) state is
1263  *
1264  *        usr1 --> e1 --> e2 <-- usr2
1265  *
1266  *      and we are e2. Drop the ref e1 is holding.
1267  *
1268  *  There are two additional cases on unregister (onoff==0)
1269  *
1270  *  2.a) state is
1271  *
1272  *         usr1 --> e1 --> e2
1273  *
1274  *       and we are e1. Nothing special to do, e2 will
1275  *       be cleaned up by the destructor of e1.
1276  *
1277  *  2.b) state is
1278  *
1279  *         usr1 --> e1     e2 <-- usr2
1280  *
1281  *       and we are either e1 or e2. Add a ref from the
1282  *       other end and hide our rings.
1283  */
1284 static int
nx_upipe_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1285 nx_upipe_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1286 {
1287 	struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1288 
1289 	SK_LOCK_ASSERT_HELD();
1290 
1291 	SK_DF(SK_VERB_USER_PIPE, "na \"%s\" (0x%llx) %s", na->na_name,
1292 	    SK_KVA(na), na_activate_mode2str(mode));
1293 
1294 	switch (mode) {
1295 	case NA_ACTIVATE_MODE_ON:
1296 		atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
1297 		break;
1298 
1299 	case NA_ACTIVATE_MODE_DEFUNCT:
1300 		break;
1301 
1302 	case NA_ACTIVATE_MODE_OFF:
1303 		atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
1304 		break;
1305 
1306 	default:
1307 		VERIFY(0);
1308 		/* NOTREACHED */
1309 		__builtin_unreachable();
1310 	}
1311 
1312 	if (pna->pna_peer_ref) {
1313 		SK_DF(SK_VERB_USER_PIPE,
1314 		    "0x%llx: case 1.a or 2.a, nothing to do", SK_KVA(na));
1315 		return 0;
1316 	}
1317 
1318 	switch (mode) {
1319 	case NA_ACTIVATE_MODE_ON:
1320 		SK_DF(SK_VERB_USER_PIPE,
1321 		    "0x%llx: case 1.b, drop peer", SK_KVA(na));
1322 		if (pna->pna_peer->pna_peer_ref) {
1323 			pna->pna_peer->pna_peer_ref = FALSE;
1324 			(void) na_release_locked(na);
1325 		}
1326 		break;
1327 
1328 	case NA_ACTIVATE_MODE_OFF:
1329 		SK_DF(SK_VERB_USER_PIPE,
1330 		    "0x%llx: case 2.b, grab peer", SK_KVA(na));
1331 		if (!pna->pna_peer->pna_peer_ref) {
1332 			na_retain_locked(na);
1333 			pna->pna_peer->pna_peer_ref = TRUE;
1334 		}
1335 		break;
1336 
1337 	default:
1338 		break;
1339 	}
1340 
1341 	return 0;
1342 }
1343 
1344 /*
1345  * nx_upipe_na_krings_delete.
1346  *
1347  * There are two cases:
1348  *
1349  * 1) state is
1350  *
1351  *                usr1 --> e1 --> e2
1352  *
1353  *    and we are e1 (e2 is not bound, so krings_delete cannot be
1354  *    called on it);
1355  *
1356  * 2) state is
1357  *
1358  *                usr1 --> e1     e2 <-- usr2
1359  *
1360  *    and we are either e1 or e2.
1361  *
1362  * In the former case we have to also delete the krings of e2;
1363  * in the latter case we do nothing (note that our krings
1364  * have already been hidden in the unregister callback).
1365  */
1366 static void
nx_upipe_na_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)1367 nx_upipe_na_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
1368     boolean_t defunct)
1369 {
1370 	struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1371 	struct nexus_adapter *ona; /* na of the other end */
1372 	uint32_t i;
1373 	enum txrx t;
1374 
1375 	SK_LOCK_ASSERT_HELD();
1376 
1377 	if (!pna->pna_peer_ref) {
1378 		SK_DF(SK_VERB_USER_PIPE,
1379 		    "0x%llx: case 2, kept alive by peer", SK_KVA(na));
1380 		/*
1381 		 * If adapter is defunct (note the explicit test against
1382 		 * NAF_DEFUNCT, and not the "defunct" parameter passed in
1383 		 * by the caller), then the peer's channel has gone defunct.
1384 		 * We get here because this channel was not defuncted, and
1385 		 * that this is the last active reference to the adapter.
1386 		 * At this point we tear everything down, since the caller
1387 		 * will proceed to destroying the memory regions.
1388 		 */
1389 		if (na->na_flags & NAF_DEFUNCT) {
1390 			na_rings_mem_teardown(na, ch, defunct);
1391 		}
1392 		return;
1393 	}
1394 
1395 	/* case 1) above */
1396 	SK_DF(SK_VERB_USER_PIPE,
1397 	    "0x%llx: case 1, deleting everyhing", SK_KVA(na));
1398 
1399 	ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
1400 
1401 	/* restore the ring to be deleted on the peer */
1402 	ona = &pna->pna_peer->pna_up;
1403 	if (ona->na_tx_rings == NULL) {
1404 		/*
1405 		 * Already deleted, we must be on an
1406 		 * cleanup-after-error path
1407 		 * Just delete this end
1408 		 */
1409 		na_rings_mem_teardown(na, ch, defunct);
1410 		return;
1411 	}
1412 
1413 	/* delete the memory rings */
1414 	na_rings_mem_teardown(na, ch, defunct);
1415 
1416 	if (!defunct) {
1417 		for_rx_tx(t) {
1418 			for (i = 0; i < na_get_nrings(ona, t); i++) {
1419 				NAKR(ona, t)[i].ckr_ring =
1420 				    NAKR(ona, t)[i].ckr_save_ring;
1421 			}
1422 		}
1423 	}
1424 
1425 	/* Delete the memory rings */
1426 	na_rings_mem_teardown(ona, ch, defunct);
1427 }
1428 
1429 static void
nx_upipe_na_dtor(struct nexus_adapter * na)1430 nx_upipe_na_dtor(struct nexus_adapter *na)
1431 {
1432 	struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1433 	struct nx_upipe *u = NX_UPIPE_PRIVATE(na->na_nx);
1434 
1435 	SK_LOCK_ASSERT_HELD();
1436 
1437 	SK_DF(SK_VERB_USER_PIPE, "0x%llx", SK_KVA(na));
1438 	if (pna->pna_peer_ref) {
1439 		SK_DF(SK_VERB_USER_PIPE,
1440 		    "0x%llx: clean up peer 0x%llx", SK_KVA(na),
1441 		    SK_KVA(&pna->pna_peer->pna_up));
1442 		pna->pna_peer_ref = FALSE;
1443 		(void) na_release_locked(&pna->pna_peer->pna_up);
1444 	}
1445 	if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
1446 		nx_upipe_na_remove(pna->pna_parent, pna);
1447 	}
1448 	(void) na_release_locked(pna->pna_parent);
1449 	pna->pna_parent = NULL;
1450 
1451 	/* release reference to parent adapter held by nx_upipe_na_find() */
1452 	ASSERT(u->nup_pna_users != 0);
1453 	if (--u->nup_pna_users == 0) {
1454 		ASSERT(u->nup_pna != NULL);
1455 		SK_DF(SK_VERB_USER_PIPE, "release parent: \"%s\" (0x%llx)",
1456 		    u->nup_pna->na_name, SK_KVA(u->nup_pna));
1457 		na_release_locked(u->nup_pna);
1458 		u->nup_pna = NULL;
1459 	}
1460 }
1461 
1462 int
nx_upipe_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)1463 nx_upipe_na_find(struct kern_nexus *nx, struct kern_channel *ch,
1464     struct chreq *chr, struct nxbind *nxb, struct proc *p,
1465     struct nexus_adapter **na, boolean_t create)
1466 {
1467 #pragma unused(ch, p)
1468 	struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
1469 	struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
1470 	struct nexus_adapter *pna = NULL; /* parent adapter */
1471 	boolean_t anon = NX_ANONYMOUS_PROV(nx);
1472 	struct nexus_upipe_adapter *mna, *sna, *req;
1473 	ch_endpoint_t ep = chr->cr_endpoint;
1474 	uint32_t pipe_id;
1475 	int error;
1476 
1477 	SK_LOCK_ASSERT_HELD();
1478 	*na = NULL;
1479 
1480 #if SK_LOG
1481 	uuid_string_t uuidstr;
1482 	SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
1483 	    "ring_id %d ring_set %u ep_type %u:%u create %u%s",
1484 	    chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
1485 	    (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
1486 	    chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
1487 	    chr->cr_real_endpoint, chr->cr_endpoint, create,
1488 	    (ep != CH_ENDPOINT_USER_PIPE_MASTER &&
1489 	    ep != CH_ENDPOINT_USER_PIPE_SLAVE) ? " (skipped)" : "");
1490 #endif /* SK_LOG */
1491 
1492 	if (ep != CH_ENDPOINT_USER_PIPE_MASTER &&
1493 	    ep != CH_ENDPOINT_USER_PIPE_SLAVE) {
1494 		return 0;
1495 	}
1496 
1497 	/*
1498 	 * Check client credentials.
1499 	 */
1500 	if (chr->cr_port == NEXUS_PORT_USER_PIPE_SERVER) {
1501 		if (!anon && (u->nup_srv_nxb == NULL || nxb == NULL ||
1502 		    !nxb_is_equal(u->nup_srv_nxb, nxb))) {
1503 			return EACCES;
1504 		}
1505 	} else {
1506 		ASSERT(chr->cr_port == NEXUS_PORT_USER_PIPE_CLIENT);
1507 		if (!anon && (u->nup_cli_nxb == NULL || nxb == NULL ||
1508 		    !nxb_is_equal(u->nup_cli_nxb, nxb))) {
1509 			return EACCES;
1510 		}
1511 	}
1512 
1513 	/*
1514 	 * First, try to find a previously-created parent adapter
1515 	 * for this nexus; else, create one and store it in the
1516 	 * nexus.  We'll release this at nexus destructor time.
1517 	 */
1518 	if ((pna = u->nup_pna) != NULL) {
1519 		na_retain_locked(pna);  /* for us */
1520 		SK_DF(SK_VERB_USER_PIPE, "found parent: \"%s\" (0x%llx)",
1521 		    pna->na_name, SK_KVA(pna));
1522 	} else {
1523 		/* callee will hold a reference for us upon success */
1524 		error = na_pseudo_create(nx, chr, &pna);
1525 		if (error != 0) {
1526 			SK_ERR("parent create failed: %d", error);
1527 			return error;
1528 		}
1529 		/* hold an extra reference for nx_upipe */
1530 		u->nup_pna = pna;
1531 		na_retain_locked(pna);
1532 		SK_DF(SK_VERB_USER_PIPE, "created parent: \"%s\" (0x%llx)",
1533 		    pna->na_name, SK_KVA(pna));
1534 	}
1535 
1536 	/* next, lookup the pipe id in the parent list */
1537 	req = NULL;
1538 	pipe_id = chr->cr_pipe_id;
1539 	mna = nx_upipe_find(pna, pipe_id);
1540 	if (mna != NULL) {
1541 		if (mna->pna_role == ep) {
1542 			SK_DF(SK_VERB_USER_PIPE,
1543 			    "found pipe_id %u directly at slot %u",
1544 			    pipe_id, mna->pna_parent_slot);
1545 			req = mna;
1546 		} else {
1547 			SK_DF(SK_VERB_USER_PIPE,
1548 			    "found pipe_id %u indirectly at slot %u",
1549 			    pipe_id, mna->pna_parent_slot);
1550 			req = mna->pna_peer;
1551 		}
1552 		/*
1553 		 * The pipe we have found already holds a ref to the parent,
1554 		 * so we need to drop the one we got from above.
1555 		 */
1556 		(void) na_release_locked(pna);
1557 		goto found;
1558 	}
1559 	SK_DF(SK_VERB_USER_PIPE,
1560 	    "pipe_id %u not found, create %u", pipe_id, create);
1561 	if (!create) {
1562 		error = ENODEV;
1563 		goto put_out;
1564 	}
1565 	/*
1566 	 * We create both master and slave.
1567 	 * The endpoint we were asked for holds a reference to
1568 	 * the other one.
1569 	 */
1570 	mna = na_upipe_alloc(Z_WAITOK);
1571 
1572 	ASSERT(mna->pna_up.na_type == NA_USER_PIPE);
1573 	ASSERT(mna->pna_up.na_free == na_upipe_free);
1574 
1575 	(void) snprintf(mna->pna_up.na_name, sizeof(mna->pna_up.na_name),
1576 	    "%s{%u", pna->na_name, pipe_id);
1577 	uuid_generate_random(mna->pna_up.na_uuid);
1578 
1579 	mna->pna_id = pipe_id;
1580 	mna->pna_role = CH_ENDPOINT_USER_PIPE_MASTER;
1581 	mna->pna_parent = pna;
1582 	mna->pna_up.na_txsync = nx_upipe_na_txsync;
1583 	mna->pna_up.na_rxsync = nx_upipe_na_rxsync;
1584 	mna->pna_up.na_activate = nx_upipe_na_activate;
1585 	mna->pna_up.na_dtor = nx_upipe_na_dtor;
1586 	mna->pna_up.na_krings_create = nx_upipe_na_krings_create;
1587 	mna->pna_up.na_krings_delete = nx_upipe_na_krings_delete;
1588 	mna->pna_up.na_arena = pna->na_arena;
1589 	skmem_arena_retain((&mna->pna_up)->na_arena);
1590 	atomic_bitset_32(&mna->pna_up.na_flags, NAF_MEM_LOANED);
1591 	*(nexus_meta_type_t *)(uintptr_t)&mna->pna_up.na_md_type =
1592 	    pna->na_md_type;
1593 	*(nexus_meta_subtype_t *)(uintptr_t)&mna->pna_up.na_md_subtype =
1594 	    pna->na_md_subtype;
1595 
1596 	*(nexus_stats_type_t *)(uintptr_t)&mna->pna_up.na_stats_type =
1597 	    NEXUS_STATS_TYPE_INVALID;
1598 	*(uint32_t *)(uintptr_t)&mna->pna_up.na_flowadv_max =
1599 	    nxp->nxp_flowadv_max;
1600 	ASSERT(mna->pna_up.na_flowadv_max == 0 ||
1601 	    skmem_arena_nexus(mna->pna_up.na_arena)->arn_flowadv_obj != NULL);
1602 
1603 	/*
1604 	 * Parent adapter parameters must match the nexus provider's by the
1605 	 * time we get here, since na_find() above shouldn't return
1606 	 * one otherwise.
1607 	 */
1608 	na_set_nrings(&mna->pna_up, NR_TX, nxp->nxp_tx_rings);
1609 	na_set_nrings(&mna->pna_up, NR_RX, nxp->nxp_rx_rings);
1610 	na_set_nslots(&mna->pna_up, NR_TX, nxp->nxp_tx_slots);
1611 	na_set_nslots(&mna->pna_up, NR_RX, nxp->nxp_rx_slots);
1612 	ASSERT(na_get_nrings(&mna->pna_up, NR_TX) == na_get_nrings(pna, NR_TX));
1613 	ASSERT(na_get_nrings(&mna->pna_up, NR_RX) == na_get_nrings(pna, NR_RX));
1614 	ASSERT(na_get_nslots(&mna->pna_up, NR_TX) == na_get_nslots(pna, NR_TX));
1615 	ASSERT(na_get_nslots(&mna->pna_up, NR_RX) == na_get_nslots(pna, NR_RX));
1616 
1617 	na_attach_common(&mna->pna_up, nx, &nx_upipe_prov_s);
1618 
1619 	/* register the master with the parent */
1620 	error = nx_upipe_na_add(pna, mna);
1621 	if (error != 0) {
1622 		goto free_mna;
1623 	}
1624 
1625 	/* create the slave */
1626 	sna = na_upipe_alloc(Z_WAITOK);
1627 
1628 	/* most fields are the same, copy from master and then fix */
1629 	bcopy(mna, sna, sizeof(*sna));
1630 	skmem_arena_retain((&sna->pna_up)->na_arena);
1631 	atomic_bitset_32(&sna->pna_up.na_flags, NAF_MEM_LOANED);
1632 
1633 	ASSERT(sna->pna_up.na_type == NA_USER_PIPE);
1634 	ASSERT(sna->pna_up.na_free == na_upipe_free);
1635 
1636 	(void) snprintf(sna->pna_up.na_name, sizeof(sna->pna_up.na_name),
1637 	    "%s}%d", pna->na_name, pipe_id);
1638 	uuid_generate_random(sna->pna_up.na_uuid);
1639 
1640 	sna->pna_role = CH_ENDPOINT_USER_PIPE_SLAVE;
1641 	na_attach_common(&sna->pna_up, nx, &nx_upipe_prov_s);
1642 
1643 	/* join the two endpoints */
1644 	mna->pna_peer = sna;
1645 	sna->pna_peer = mna;
1646 
1647 	/*
1648 	 * We already have a reference to the parent, but we
1649 	 * need another one for the other endpoint we created
1650 	 */
1651 	na_retain_locked(pna);
1652 
1653 	if ((chr->cr_mode & CHMODE_DEFUNCT_OK) != 0) {
1654 		atomic_bitset_32(&pna->na_flags, NAF_DEFUNCT_OK);
1655 	}
1656 
1657 	if (ep == CH_ENDPOINT_USER_PIPE_MASTER) {
1658 		req = mna;
1659 		mna->pna_peer_ref = TRUE;
1660 		na_retain_locked(&sna->pna_up);
1661 	} else {
1662 		req = sna;
1663 		sna->pna_peer_ref = TRUE;
1664 		na_retain_locked(&mna->pna_up);
1665 	}
1666 
1667 	/* parent adapter now has two users (mna and sna) */
1668 	u->nup_pna_users += 2;
1669 
1670 #if SK_LOG
1671 	SK_DF(SK_VERB_USER_PIPE, "created master 0x%llx and slave 0x%llx",
1672 	    SK_KVA(mna), SK_KVA(sna));
1673 	SK_DF(SK_VERB_USER_PIPE, "mna: \"%s\"", mna->pna_up.na_name);
1674 	SK_DF(SK_VERB_USER_PIPE, "  UUID:        %s",
1675 	    sk_uuid_unparse(mna->pna_up.na_uuid, uuidstr));
1676 	SK_DF(SK_VERB_USER_PIPE, "  nx:          0x%llx (\"%s\":\"%s\")",
1677 	    SK_KVA(mna->pna_up.na_nx), NX_DOM(mna->pna_up.na_nx)->nxdom_name,
1678 	    NX_DOM_PROV(mna->pna_up.na_nx)->nxdom_prov_name);
1679 	SK_DF(SK_VERB_USER_PIPE, "  flags:       0x%b",
1680 	    mna->pna_up.na_flags, NAF_BITS);
1681 	SK_DF(SK_VERB_USER_PIPE, "  flowadv_max: %u",
1682 	    mna->pna_up.na_flowadv_max);
1683 	SK_DF(SK_VERB_USER_PIPE, "  rings:       tx %u rx %u",
1684 	    na_get_nrings(&mna->pna_up, NR_TX),
1685 	    na_get_nrings(&mna->pna_up, NR_RX));
1686 	SK_DF(SK_VERB_USER_PIPE, "  slots:       tx %u rx %u",
1687 	    na_get_nslots(&mna->pna_up, NR_TX),
1688 	    na_get_nslots(&mna->pna_up, NR_RX));
1689 	SK_DF(SK_VERB_USER_PIPE, "  next_pipe:   %u", mna->pna_up.na_next_pipe);
1690 	SK_DF(SK_VERB_USER_PIPE, "  max_pipes:   %u", mna->pna_up.na_max_pipes);
1691 	SK_DF(SK_VERB_USER_PIPE, "  parent:      \"%s\"",
1692 	    mna->pna_parent->na_name);
1693 	SK_DF(SK_VERB_USER_PIPE, "  id:          %u", mna->pna_id);
1694 	SK_DF(SK_VERB_USER_PIPE, "  role:        %u", mna->pna_role);
1695 	SK_DF(SK_VERB_USER_PIPE, "  peer_ref:    %u", mna->pna_peer_ref);
1696 	SK_DF(SK_VERB_USER_PIPE, "  parent_slot: %u", mna->pna_parent_slot);
1697 	SK_DF(SK_VERB_USER_PIPE, "sna: \"%s\"", sna->pna_up.na_name);
1698 	SK_DF(SK_VERB_USER_PIPE, "  UUID:        %s",
1699 	    sk_uuid_unparse(sna->pna_up.na_uuid, uuidstr));
1700 	SK_DF(SK_VERB_USER_PIPE, "  nx:          0x%llx (\"%s\":\"%s\")",
1701 	    SK_KVA(sna->pna_up.na_nx), NX_DOM(sna->pna_up.na_nx)->nxdom_name,
1702 	    NX_DOM_PROV(sna->pna_up.na_nx)->nxdom_prov_name);
1703 	SK_DF(SK_VERB_USER_PIPE, "  flags:       0x%b",
1704 	    sna->pna_up.na_flags, NAF_BITS);
1705 	SK_DF(SK_VERB_USER_PIPE, "  flowadv_max: %u",
1706 	    sna->pna_up.na_flowadv_max);
1707 	SK_DF(SK_VERB_USER_PIPE, "  rings:       tx %u rx %u",
1708 	    na_get_nrings(&sna->pna_up, NR_TX),
1709 	    na_get_nrings(&sna->pna_up, NR_RX));
1710 	SK_DF(SK_VERB_USER_PIPE, "  slots:       tx %u rx %u",
1711 	    na_get_nslots(&sna->pna_up, NR_TX),
1712 	    na_get_nslots(&sna->pna_up, NR_RX));
1713 	SK_DF(SK_VERB_USER_PIPE, "  next_pipe:   %u", sna->pna_up.na_next_pipe);
1714 	SK_DF(SK_VERB_USER_PIPE, "  max_pipes:   %u", sna->pna_up.na_max_pipes);
1715 	SK_DF(SK_VERB_USER_PIPE, "  parent:      \"%s\"",
1716 	    sna->pna_parent->na_name);
1717 	SK_DF(SK_VERB_USER_PIPE, "  id:          %u", sna->pna_id);
1718 	SK_DF(SK_VERB_USER_PIPE, "  role:        %u", sna->pna_role);
1719 	SK_DF(SK_VERB_USER_PIPE, "  peer_ref:    %u", sna->pna_peer_ref);
1720 	SK_DF(SK_VERB_USER_PIPE, "  parent_slot: %u", sna->pna_parent_slot);
1721 #endif /* SK_LOG */
1722 
1723 found:
1724 
1725 	SK_DF(SK_VERB_USER_PIPE, "pipe_id %u role %s at 0x%llx", pipe_id,
1726 	    (req->pna_role == CH_ENDPOINT_USER_PIPE_MASTER ?
1727 	    "master" : "slave"), SK_KVA(req));
1728 	if ((chr->cr_mode & CHMODE_DEFUNCT_OK) == 0) {
1729 		atomic_bitclear_32(&pna->na_flags, NAF_DEFUNCT_OK);
1730 	}
1731 	*na = &req->pna_up;
1732 	na_retain_locked(*na);
1733 
1734 	/*
1735 	 * Keep the reference to the parent; it will be released
1736 	 * by the adapter's destructor.
1737 	 */
1738 	return 0;
1739 
1740 free_mna:
1741 	if (mna->pna_up.na_arena != NULL) {
1742 		skmem_arena_release((&mna->pna_up)->na_arena);
1743 		mna->pna_up.na_arena = NULL;
1744 	}
1745 	NA_FREE(&mna->pna_up);
1746 put_out:
1747 	(void) na_release_locked(pna);
1748 	return error;
1749 }
1750 
1751 static struct nx_upipe *
nx_upipe_alloc(zalloc_flags_t how)1752 nx_upipe_alloc(zalloc_flags_t how)
1753 {
1754 	struct nx_upipe *u;
1755 
1756 	SK_LOCK_ASSERT_HELD();
1757 
1758 	u = zalloc_flags(nx_upipe_zone, how | Z_ZERO);
1759 	if (u) {
1760 		SK_DF(SK_VERB_MEM, "upipe 0x%llx ALLOC", SK_KVA(u));
1761 	}
1762 	return u;
1763 }
1764 
1765 static void
nx_upipe_free(struct nx_upipe * u)1766 nx_upipe_free(struct nx_upipe *u)
1767 {
1768 	ASSERT(u->nup_pna == NULL);
1769 	ASSERT(u->nup_pna_users == 0);
1770 	ASSERT(u->nup_cli_nxb == NULL);
1771 	ASSERT(u->nup_srv_nxb == NULL);
1772 
1773 	SK_DF(SK_VERB_MEM, "upipe 0x%llx FREE", SK_KVA(u));
1774 	zfree(nx_upipe_zone, u);
1775 }
1776