1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53
54 #include <skywalk/os_skywalk_private.h>
55 #include <skywalk/nexus/upipe/nx_user_pipe.h>
56
57 #define NX_UPIPE_RINGSIZE 128 /* default ring size */
58 #define NX_UPIPE_MAXRINGS NX_MAX_NUM_RING_PAIR
59 #define NX_UPIPE_MINSLOTS 2 /* XXX same as above */
60 #define NX_UPIPE_MAXSLOTS 4096 /* XXX same as above */
61 #define NX_UPIPE_BUFSIZE (2 * 1024)
62 #define NX_UPIPE_MINBUFSIZE 1024
63 #define NX_UPIPE_MAXBUFSIZE (16 * 1024)
64 #define NX_UPIPE_MHINTS NEXUS_MHINTS_NORMAL
65
66 static int nx_upipe_na_alloc(struct nexus_adapter *, uint32_t);
67 static struct nexus_upipe_adapter *nx_upipe_find(struct nexus_adapter *,
68 uint32_t);
69 static int nx_upipe_na_add(struct nexus_adapter *,
70 struct nexus_upipe_adapter *);
71 static void nx_upipe_na_remove(struct nexus_adapter *,
72 struct nexus_upipe_adapter *);
73 static int nx_upipe_na_txsync(struct __kern_channel_ring *,
74 struct proc *, uint32_t);
75 static int nx_upipe_na_txsync_locked(struct __kern_channel_ring *,
76 struct proc *, uint32_t, int *, boolean_t);
77 static int nx_upipe_na_rxsync(struct __kern_channel_ring *,
78 struct proc *, uint32_t);
79 static int nx_upipe_na_krings_create(struct nexus_adapter *,
80 struct kern_channel *);
81 static int nx_upipe_na_activate(struct nexus_adapter *, na_activate_mode_t);
82 static void nx_upipe_na_krings_delete(struct nexus_adapter *,
83 struct kern_channel *, boolean_t);
84 static void nx_upipe_na_dtor(struct nexus_adapter *);
85
86 static void nx_upipe_dom_init(struct nxdom *);
87 static void nx_upipe_dom_terminate(struct nxdom *);
88 static void nx_upipe_dom_fini(struct nxdom *);
89 static int nx_upipe_dom_bind_port(struct kern_nexus *, nexus_port_t *,
90 struct nxbind *, void *);
91 static int nx_upipe_dom_unbind_port(struct kern_nexus *, nexus_port_t);
92 static int nx_upipe_dom_connect(struct kern_nexus_domain_provider *,
93 struct kern_nexus *, struct kern_channel *, struct chreq *,
94 struct kern_channel *, struct nxbind *, struct proc *);
95 static void nx_upipe_dom_disconnect(struct kern_nexus_domain_provider *,
96 struct kern_nexus *, struct kern_channel *);
97 static void nx_upipe_dom_defunct(struct kern_nexus_domain_provider *,
98 struct kern_nexus *, struct kern_channel *, struct proc *);
99 static void nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider *,
100 struct kern_nexus *, struct kern_channel *, boolean_t);
101
102 static int nx_upipe_prov_init(struct kern_nexus_domain_provider *);
103 static int nx_upipe_prov_params_adjust(
104 const struct kern_nexus_domain_provider *, const struct nxprov_params *,
105 struct nxprov_adjusted_params *);
106 static int nx_upipe_prov_params(struct kern_nexus_domain_provider *,
107 const uint32_t, const struct nxprov_params *, struct nxprov_params *,
108 struct skmem_region_params[SKMEM_REGIONS]);
109 static int nx_upipe_prov_mem_new(struct kern_nexus_domain_provider *,
110 struct kern_nexus *, struct nexus_adapter *);
111 static void nx_upipe_prov_fini(struct kern_nexus_domain_provider *);
112 static int nx_upipe_prov_nx_ctor(struct kern_nexus *);
113 static void nx_upipe_prov_nx_dtor(struct kern_nexus *);
114
115 static struct nexus_upipe_adapter *na_upipe_alloc(zalloc_flags_t);
116 static void na_upipe_free(struct nexus_adapter *);
117
118 static struct nx_upipe *nx_upipe_alloc(zalloc_flags_t);
119 static void nx_upipe_free(struct nx_upipe *);
120
121 #if (DEVELOPMENT || DEBUG)
122 static uint32_t nx_upipe_mhints = 0;
123 SYSCTL_NODE(_kern_skywalk, OID_AUTO, upipe, CTLFLAG_RW | CTLFLAG_LOCKED,
124 0, "Skywalk upipe tuning");
125 SYSCTL_UINT(_kern_skywalk_upipe, OID_AUTO, nx_mhints,
126 CTLFLAG_RW | CTLFLAG_LOCKED, &nx_upipe_mhints, 0,
127 "upipe nexus memory usage hints");
128 #endif /* (DEVELOPMENT || DEBUG) */
129
130 struct nxdom nx_upipe_dom_s = {
131 .nxdom_prov_head =
132 STAILQ_HEAD_INITIALIZER(nx_upipe_dom_s.nxdom_prov_head),
133 .nxdom_type = NEXUS_TYPE_USER_PIPE,
134 .nxdom_md_type = NEXUS_META_TYPE_QUANTUM,
135 .nxdom_md_subtype = NEXUS_META_SUBTYPE_PAYLOAD,
136 .nxdom_name = "upipe",
137 .nxdom_ports = {
138 .nb_def = 2,
139 .nb_min = 2,
140 .nb_max = 2,
141 },
142 .nxdom_tx_rings = {
143 .nb_def = 1,
144 .nb_min = 1,
145 .nb_max = NX_UPIPE_MAXRINGS,
146 },
147 .nxdom_rx_rings = {
148 .nb_def = 1,
149 .nb_min = 1,
150 .nb_max = NX_UPIPE_MAXRINGS,
151 },
152 .nxdom_tx_slots = {
153 .nb_def = NX_UPIPE_RINGSIZE,
154 .nb_min = NX_UPIPE_MINSLOTS,
155 .nb_max = NX_UPIPE_MAXSLOTS,
156 },
157 .nxdom_rx_slots = {
158 .nb_def = NX_UPIPE_RINGSIZE,
159 .nb_min = NX_UPIPE_MINSLOTS,
160 .nb_max = NX_UPIPE_MAXSLOTS,
161 },
162 .nxdom_buf_size = {
163 .nb_def = NX_UPIPE_BUFSIZE,
164 .nb_min = NX_UPIPE_MINBUFSIZE,
165 .nb_max = NX_UPIPE_MAXBUFSIZE,
166 },
167 .nxdom_meta_size = {
168 .nb_def = NX_METADATA_OBJ_MIN_SZ,
169 .nb_min = NX_METADATA_OBJ_MIN_SZ,
170 .nb_max = NX_METADATA_USR_MAX_SZ,
171 },
172 .nxdom_stats_size = {
173 .nb_def = 0,
174 .nb_min = 0,
175 .nb_max = NX_STATS_MAX_SZ,
176 },
177 .nxdom_pipes = {
178 .nb_def = 0,
179 .nb_min = 0,
180 .nb_max = NX_UPIPE_MAXPIPES,
181 },
182 .nxdom_mhints = {
183 .nb_def = NX_UPIPE_MHINTS,
184 .nb_min = NEXUS_MHINTS_NORMAL,
185 .nb_max = (NEXUS_MHINTS_NORMAL | NEXUS_MHINTS_WILLNEED |
186 NEXUS_MHINTS_LOWLATENCY | NEXUS_MHINTS_HIUSE),
187 },
188 .nxdom_flowadv_max = {
189 .nb_def = 0,
190 .nb_min = 0,
191 .nb_max = NX_FLOWADV_MAX,
192 },
193 .nxdom_nexusadv_size = {
194 .nb_def = 0,
195 .nb_min = 0,
196 .nb_max = NX_NEXUSADV_MAX_SZ,
197 },
198 .nxdom_capabilities = {
199 .nb_def = NXPCAP_USER_CHANNEL,
200 .nb_min = NXPCAP_USER_CHANNEL,
201 .nb_max = NXPCAP_USER_CHANNEL,
202 },
203 .nxdom_qmap = {
204 .nb_def = NEXUS_QMAP_TYPE_INVALID,
205 .nb_min = NEXUS_QMAP_TYPE_INVALID,
206 .nb_max = NEXUS_QMAP_TYPE_INVALID,
207 },
208 .nxdom_max_frags = {
209 .nb_def = NX_PBUF_FRAGS_DEFAULT,
210 .nb_min = NX_PBUF_FRAGS_MIN,
211 .nb_max = NX_PBUF_FRAGS_DEFAULT,
212 },
213 .nxdom_init = nx_upipe_dom_init,
214 .nxdom_terminate = nx_upipe_dom_terminate,
215 .nxdom_fini = nx_upipe_dom_fini,
216 .nxdom_find_port = NULL,
217 .nxdom_port_is_reserved = NULL,
218 .nxdom_bind_port = nx_upipe_dom_bind_port,
219 .nxdom_unbind_port = nx_upipe_dom_unbind_port,
220 .nxdom_connect = nx_upipe_dom_connect,
221 .nxdom_disconnect = nx_upipe_dom_disconnect,
222 .nxdom_defunct = nx_upipe_dom_defunct,
223 .nxdom_defunct_finalize = nx_upipe_dom_defunct_finalize,
224 };
225
226 static struct kern_nexus_domain_provider nx_upipe_prov_s = {
227 .nxdom_prov_name = NEXUS_PROVIDER_USER_PIPE,
228 .nxdom_prov_flags = NXDOMPROVF_DEFAULT,
229 .nxdom_prov_cb = {
230 .dp_cb_init = nx_upipe_prov_init,
231 .dp_cb_fini = nx_upipe_prov_fini,
232 .dp_cb_params = nx_upipe_prov_params,
233 .dp_cb_mem_new = nx_upipe_prov_mem_new,
234 .dp_cb_config = NULL,
235 .dp_cb_nx_ctor = nx_upipe_prov_nx_ctor,
236 .dp_cb_nx_dtor = nx_upipe_prov_nx_dtor,
237 .dp_cb_nx_mem_info = NULL,
238 .dp_cb_nx_mib_get = NULL,
239 .dp_cb_nx_stop = NULL,
240 },
241 };
242
243 static ZONE_DEFINE(na_upipe_zone, SKMEM_ZONE_PREFIX ".na.upipe",
244 sizeof(struct nexus_upipe_adapter), ZC_ZFREE_CLEARMEM);
245
246 static ZONE_DEFINE(nx_upipe_zone, SKMEM_ZONE_PREFIX ".nx.upipe",
247 sizeof(struct nx_upipe), ZC_ZFREE_CLEARMEM);
248
249 #define SKMEM_TAG_PIPES "com.apple.skywalk.pipes"
250 static SKMEM_TAG_DEFINE(skmem_tag_pipes, SKMEM_TAG_PIPES);
251
252 static void
nx_upipe_dom_init(struct nxdom * nxdom)253 nx_upipe_dom_init(struct nxdom *nxdom)
254 {
255 SK_LOCK_ASSERT_HELD();
256 ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
257
258 (void) nxdom_prov_add(nxdom, &nx_upipe_prov_s);
259 }
260
261 static void
nx_upipe_dom_terminate(struct nxdom * nxdom)262 nx_upipe_dom_terminate(struct nxdom *nxdom)
263 {
264 struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
265
266 STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
267 nxdom_prov_link, tnxdp) {
268 (void) nxdom_prov_del(nxdom_prov);
269 }
270 }
271
272 static void
nx_upipe_dom_fini(struct nxdom * nxdom)273 nx_upipe_dom_fini(struct nxdom *nxdom)
274 {
275 #pragma unused(nxdom)
276 }
277
278 static int
nx_upipe_prov_init(struct kern_nexus_domain_provider * nxdom_prov)279 nx_upipe_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
280 {
281 #pragma unused(nxdom_prov)
282 SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
283 return 0;
284 }
285
286 static int
nx_upipe_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)287 nx_upipe_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
288 const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
289 {
290 #pragma unused(nxdom_prov, nxp)
291 /*
292 * User pipe requires double the amount of rings.
293 * The ring counts must also be symmetrical.
294 */
295 if (*(adj->adj_tx_rings) != *(adj->adj_rx_rings)) {
296 SK_ERR("rings: tx (%u) != rx (%u)", *(adj->adj_tx_rings),
297 *(adj->adj_rx_rings));
298 return EINVAL;
299 }
300
301 *(adj->adj_tx_rings) *= 2;
302 *(adj->adj_rx_rings) *= 2;
303
304 if (adj->adj_buf_srp->srp_r_seg_size == 0) {
305 adj->adj_buf_srp->srp_r_seg_size = skmem_usr_buf_seg_size;
306 }
307
308 /* enable magazines layer for metadata */
309 *(adj->adj_md_magazines) = TRUE;
310
311 return 0;
312 }
313
314 static int
nx_upipe_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS])315 nx_upipe_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
316 const uint32_t req, const struct nxprov_params *nxp0,
317 struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS])
318 {
319 struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
320 int err;
321
322 err = nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
323 nxdom, nxdom, nxdom, nx_upipe_prov_params_adjust);
324 #if (DEVELOPMENT || DEBUG)
325 /* sysctl override */
326 if ((err == 0) && (nx_upipe_mhints != 0)) {
327 nxp->nxp_mhints = nx_upipe_mhints;
328 }
329 #endif /* (DEVELOPMENT || DEBUG) */
330 return err;
331 }
332
333 static int
nx_upipe_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)334 nx_upipe_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
335 struct kern_nexus *nx, struct nexus_adapter *na)
336 {
337 #pragma unused(nxdom_prov)
338 int err = 0;
339
340 SK_DF(SK_VERB_USER_PIPE,
341 "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
342 NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
343 SK_KVA(na));
344
345 ASSERT(na->na_arena == NULL);
346 ASSERT(NX_USER_CHANNEL_PROV(nx));
347 /*
348 * The underlying nexus adapters already share the same memory
349 * allocator, and thus we don't care about storing the pp in
350 * the nexus.
351 *
352 * This means that clients calling kern_nexus_get_pbufpool()
353 * will get NULL, but this is fine since we don't expose the
354 * user pipe to external kernel clients.
355 */
356 na->na_arena = skmem_arena_create_for_nexus(na,
357 NX_PROV(nx)->nxprov_region_params, NULL, NULL, FALSE,
358 FALSE, NULL, &err);
359 ASSERT(na->na_arena != NULL || err != 0);
360
361 return err;
362 }
363
364 static void
nx_upipe_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)365 nx_upipe_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
366 {
367 #pragma unused(nxdom_prov)
368 SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
369 }
370
371 static int
nx_upipe_prov_nx_ctor(struct kern_nexus * nx)372 nx_upipe_prov_nx_ctor(struct kern_nexus *nx)
373 {
374 SK_LOCK_ASSERT_HELD();
375 ASSERT(nx->nx_arg == NULL);
376
377 SK_D("nexus 0x%llx (%s)", SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name);
378
379 nx->nx_arg = nx_upipe_alloc(Z_WAITOK);
380 SK_D("create new upipe 0x%llx for nexus 0x%llx",
381 SK_KVA(NX_UPIPE_PRIVATE(nx)), SK_KVA(nx));
382
383 return 0;
384 }
385
386 static void
nx_upipe_prov_nx_dtor(struct kern_nexus * nx)387 nx_upipe_prov_nx_dtor(struct kern_nexus *nx)
388 {
389 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
390
391 SK_LOCK_ASSERT_HELD();
392
393 SK_D("nexus 0x%llx (%s) upipe 0x%llx", SK_KVA(nx),
394 NX_DOM_PROV(nx)->nxdom_prov_name, SK_KVA(u));
395
396 if (u->nup_cli_nxb != NULL) {
397 nxb_free(u->nup_cli_nxb);
398 u->nup_cli_nxb = NULL;
399 }
400 if (u->nup_srv_nxb != NULL) {
401 nxb_free(u->nup_srv_nxb);
402 u->nup_srv_nxb = NULL;
403 }
404
405 SK_DF(SK_VERB_USER_PIPE, "marking upipe 0x%llx as free", SK_KVA(u));
406 nx_upipe_free(u);
407 nx->nx_arg = NULL;
408 }
409
410 static struct nexus_upipe_adapter *
na_upipe_alloc(zalloc_flags_t how)411 na_upipe_alloc(zalloc_flags_t how)
412 {
413 struct nexus_upipe_adapter *pna;
414
415 _CASSERT(offsetof(struct nexus_upipe_adapter, pna_up) == 0);
416
417 pna = zalloc_flags(na_upipe_zone, how | Z_ZERO);
418 if (pna) {
419 pna->pna_up.na_type = NA_USER_PIPE;
420 pna->pna_up.na_free = na_upipe_free;
421 }
422 return pna;
423 }
424
425 static void
na_upipe_free(struct nexus_adapter * na)426 na_upipe_free(struct nexus_adapter *na)
427 {
428 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
429
430 ASSERT(pna->pna_up.na_refcount == 0);
431 SK_DF(SK_VERB_MEM, "pna 0x%llx FREE", SK_KVA(pna));
432 bzero(pna, sizeof(*pna));
433 zfree(na_upipe_zone, pna);
434 }
435
436 static int
nx_upipe_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb0,void * info)437 nx_upipe_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
438 struct nxbind *nxb0, void *info)
439 {
440 #pragma unused(info)
441 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
442 struct nxbind *nxb = NULL;
443 int error = 0;
444
445 ASSERT(nx_port != NULL);
446 ASSERT(nxb0 != NULL);
447
448 switch (*nx_port) {
449 case NEXUS_PORT_USER_PIPE_CLIENT:
450 case NEXUS_PORT_USER_PIPE_SERVER:
451 if ((*nx_port == NEXUS_PORT_USER_PIPE_CLIENT &&
452 u->nup_cli_nxb != NULL) ||
453 (*nx_port == NEXUS_PORT_USER_PIPE_SERVER &&
454 u->nup_srv_nxb != NULL)) {
455 error = EEXIST;
456 break;
457 }
458
459 nxb = nxb_alloc(Z_WAITOK);
460 nxb_move(nxb0, nxb);
461 if (*nx_port == NEXUS_PORT_USER_PIPE_CLIENT) {
462 u->nup_cli_nxb = nxb;
463 } else {
464 u->nup_srv_nxb = nxb;
465 }
466
467 ASSERT(error == 0);
468 break;
469
470 default:
471 error = EDOM;
472 break;
473 }
474
475 return error;
476 }
477
478 static int
nx_upipe_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)479 nx_upipe_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
480 {
481 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
482 struct nxbind *nxb = NULL;
483 int error = 0;
484
485 ASSERT(nx_port != NEXUS_PORT_ANY);
486
487 switch (nx_port) {
488 case NEXUS_PORT_USER_PIPE_CLIENT:
489 case NEXUS_PORT_USER_PIPE_SERVER:
490 if ((nx_port == NEXUS_PORT_USER_PIPE_CLIENT &&
491 u->nup_cli_nxb == NULL) ||
492 (nx_port == NEXUS_PORT_USER_PIPE_SERVER &&
493 u->nup_srv_nxb == NULL)) {
494 error = ENOENT;
495 break;
496 }
497
498 if (nx_port == NEXUS_PORT_USER_PIPE_CLIENT) {
499 nxb = u->nup_cli_nxb;
500 u->nup_cli_nxb = NULL;
501 } else {
502 nxb = u->nup_srv_nxb;
503 u->nup_srv_nxb = NULL;
504 }
505 nxb_free(nxb);
506 ASSERT(error == 0);
507 break;
508
509 default:
510 error = EDOM;
511 break;
512 }
513
514 return error;
515 }
516
517 static int
nx_upipe_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)518 nx_upipe_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
519 struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
520 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
521 {
522 #pragma unused(nxdom_prov)
523 nexus_port_t port = chr->cr_port;
524 int err = 0;
525
526 SK_LOCK_ASSERT_HELD();
527
528 ASSERT(NX_DOM_PROV(nx) == nxdom_prov);
529 ASSERT(nx->nx_prov->nxprov_params->nxp_type ==
530 nxdom_prov->nxdom_prov_dom->nxdom_type &&
531 nx->nx_prov->nxprov_params->nxp_type == NEXUS_TYPE_USER_PIPE);
532
533 /*
534 * XXX: channel in user packet pool mode is not supported for
535 * user-pipe for now.
536 */
537 if (chr->cr_mode & CHMODE_USER_PACKET_POOL) {
538 SK_ERR("User packet pool mode not supported for upipe");
539 err = ENOTSUP;
540 goto done;
541 }
542
543 if (chr->cr_mode & CHMODE_EVENT_RING) {
544 SK_ERR("event ring is not supported for upipe");
545 err = ENOTSUP;
546 goto done;
547 }
548
549 if (chr->cr_mode & CHMODE_LOW_LATENCY) {
550 SK_ERR("low latency is not supported for upipe");
551 err = ENOTSUP;
552 goto done;
553 }
554
555 if (port == NEXUS_PORT_USER_PIPE_SERVER) {
556 chr->cr_real_endpoint = CH_ENDPOINT_USER_PIPE_MASTER;
557 } else if (port == NEXUS_PORT_USER_PIPE_CLIENT) {
558 chr->cr_real_endpoint = CH_ENDPOINT_USER_PIPE_SLAVE;
559 } else {
560 err = EINVAL;
561 goto done;
562 }
563
564 chr->cr_endpoint = chr->cr_real_endpoint;
565 chr->cr_ring_set = RING_SET_DEFAULT;
566 chr->cr_pipe_id = 0;
567 (void) snprintf(chr->cr_name, sizeof(chr->cr_name), "upipe:%llu:%.*s",
568 nx->nx_id, (int)nx->nx_prov->nxprov_params->nxp_namelen,
569 nx->nx_prov->nxprov_params->nxp_name);
570
571 err = na_connect(nx, ch, chr, ch0, nxb, p);
572 done:
573 return err;
574 }
575
576 static void
nx_upipe_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)577 nx_upipe_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
578 struct kern_nexus *nx, struct kern_channel *ch)
579 {
580 #pragma unused(nxdom_prov)
581 SK_LOCK_ASSERT_HELD();
582
583 SK_D("channel 0x%llx -!- nexus 0x%llx (%s:\"%s\":%u:%d)", SK_KVA(ch),
584 SK_KVA(nx), nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
585 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
586
587 na_disconnect(nx, ch);
588 /*
589 * Set NXF_REJECT on the nexus which would cause any channel on the
590 * peer adapter to cease to function.
591 */
592 if (NX_PROV(nx)->nxprov_params->nxp_reject_on_close) {
593 atomic_bitset_32(&nx->nx_flags, NXF_REJECT);
594 }
595 }
596
597 static void
nx_upipe_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)598 nx_upipe_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
599 struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
600 {
601 #pragma unused(nxdom_prov, nx)
602 struct nexus_adapter *na = ch->ch_na;
603 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
604 ring_id_t qfirst = ch->ch_first[NR_TX];
605 ring_id_t qlast = ch->ch_last[NR_TX];
606 uint32_t i;
607
608 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
609 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
610 ASSERT(na->na_type == NA_USER_PIPE);
611
612 /*
613 * Inform the peer receiver thread in nx_upipe_na_rxsync() or the
614 * peer transmit thread in nx_upipe_na_txsync() about
615 * this endpoint going defunct. We utilize the TX ring's
616 * lock for serialization, since that is what's being used
617 * by the receiving endpoint.
618 */
619 for (i = qfirst; i < qlast; i++) {
620 /*
621 * For maintaining lock ordering between the two channels of
622 * user pipe.
623 */
624 if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
625 (void) kr_enter(&NAKR(na, NR_TX)[i], TRUE);
626 (void) kr_enter(NAKR(na, NR_RX)[i].ckr_pipe, TRUE);
627 } else {
628 (void) kr_enter(NAKR(na, NR_RX)[i].ckr_pipe, TRUE);
629 (void) kr_enter(&NAKR(na, NR_TX)[i], TRUE);
630 }
631 }
632
633 na_ch_rings_defunct(ch, p);
634
635 for (i = qfirst; i < qlast; i++) {
636 if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
637 (void) kr_exit(NAKR(na, NR_RX)[i].ckr_pipe);
638 (void) kr_exit(&NAKR(na, NR_TX)[i]);
639 } else {
640 (void) kr_exit(&NAKR(na, NR_TX)[i]);
641 (void) kr_exit(NAKR(na, NR_RX)[i].ckr_pipe);
642 }
643 }
644 }
645
646 static void
nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)647 nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
648 struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
649 {
650 #pragma unused(nxdom_prov)
651 struct nexus_upipe_adapter *pna =
652 (struct nexus_upipe_adapter *)ch->ch_na;
653
654 if (!locked) {
655 SK_LOCK_ASSERT_NOTHELD();
656 SK_LOCK();
657 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
658 } else {
659 SK_LOCK_ASSERT_HELD();
660 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
661 }
662
663 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
664 ASSERT(ch->ch_na->na_type == NA_USER_PIPE);
665
666 /*
667 * At this point, we know that the arena shared by the master and
668 * slave adapters has no more valid mappings on the channels opened
669 * to them. We need to invoke na_defunct() on both adapters to
670 * release any remaining slots attached to their rings.
671 *
672 * Note that the 'ch' that we pass in here is irrelevant as we
673 * don't support user packet pool for user pipe.
674 */
675 na_defunct(nx, ch, &pna->pna_up, locked);
676 if (pna->pna_peer != NULL) {
677 na_defunct(nx, ch, &pna->pna_peer->pna_up, locked);
678 }
679
680 /*
681 * And if their parent adapter (the memory owner) is a pseudo
682 * nexus adapter that we initially created in nx_upipe_na_find(),
683 * invoke na_defunct() on it now to do the final teardown on
684 * the arena.
685 */
686 if (pna->pna_parent->na_type == NA_PSEUDO) {
687 na_defunct(nx, ch, pna->pna_parent, locked);
688 }
689
690 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d)",
691 ch->ch_name, ch->ch_pid, SK_KVA(ch), SK_KVA(nx),
692 nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
693 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
694
695 if (!locked) {
696 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
697 SK_UNLOCK();
698 } else {
699 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
700 SK_LOCK_ASSERT_HELD();
701 }
702 }
703
704 /* allocate the pipe array in the parent adapter */
705 static int
nx_upipe_na_alloc(struct nexus_adapter * na,uint32_t npipes)706 nx_upipe_na_alloc(struct nexus_adapter *na, uint32_t npipes)
707 {
708 struct nexus_upipe_adapter **npa;
709
710 if (npipes <= na->na_max_pipes) {
711 /* we already have more entries that requested */
712 return 0;
713 }
714 if (npipes < na->na_next_pipe || npipes > NX_UPIPE_MAXPIPES) {
715 return EINVAL;
716 }
717
718 npa = sk_realloc_type_array(struct nexus_upipe_adapter *,
719 na->na_max_pipes, npipes, na->na_pipes, Z_WAITOK, skmem_tag_pipes);
720 if (npa == NULL) {
721 return ENOMEM;
722 }
723
724 na->na_pipes = npa;
725 na->na_max_pipes = npipes;
726
727 return 0;
728 }
729
730 /* deallocate the parent array in the parent adapter */
731 void
nx_upipe_na_dealloc(struct nexus_adapter * na)732 nx_upipe_na_dealloc(struct nexus_adapter *na)
733 {
734 if (na->na_pipes) {
735 if (na->na_next_pipe > 0) {
736 SK_ERR("freeing not empty pipe array for %s "
737 "(%u dangling pipes)!", na->na_name,
738 na->na_next_pipe);
739 }
740 sk_free_type_array(struct nexus_upipe_adapter *,
741 na->na_max_pipes, na->na_pipes);
742 na->na_pipes = NULL;
743 na->na_max_pipes = 0;
744 na->na_next_pipe = 0;
745 }
746 }
747
748 /* find a pipe endpoint with the given id among the parent's pipes */
749 static struct nexus_upipe_adapter *
nx_upipe_find(struct nexus_adapter * parent,uint32_t pipe_id)750 nx_upipe_find(struct nexus_adapter *parent, uint32_t pipe_id)
751 {
752 uint32_t i;
753 struct nexus_upipe_adapter *na;
754
755 for (i = 0; i < parent->na_next_pipe; i++) {
756 na = parent->na_pipes[i];
757 if (na->pna_id == pipe_id) {
758 return na;
759 }
760 }
761 return NULL;
762 }
763
764 /* add a new pipe endpoint to the parent array */
765 static int
nx_upipe_na_add(struct nexus_adapter * parent,struct nexus_upipe_adapter * na)766 nx_upipe_na_add(struct nexus_adapter *parent, struct nexus_upipe_adapter *na)
767 {
768 if (parent->na_next_pipe >= parent->na_max_pipes) {
769 uint32_t npipes = parent->na_max_pipes ?
770 2 * parent->na_max_pipes : 2;
771 int error = nx_upipe_na_alloc(parent, npipes);
772 if (error) {
773 return error;
774 }
775 }
776
777 parent->na_pipes[parent->na_next_pipe] = na;
778 na->pna_parent_slot = parent->na_next_pipe;
779 parent->na_next_pipe++;
780 return 0;
781 }
782
783 /* remove the given pipe endpoint from the parent array */
784 static void
nx_upipe_na_remove(struct nexus_adapter * parent,struct nexus_upipe_adapter * na)785 nx_upipe_na_remove(struct nexus_adapter *parent, struct nexus_upipe_adapter *na)
786 {
787 uint32_t n;
788 n = --parent->na_next_pipe;
789 if (n != na->pna_parent_slot) {
790 struct nexus_upipe_adapter **p =
791 &parent->na_pipes[na->pna_parent_slot];
792 *p = parent->na_pipes[n];
793 (*p)->pna_parent_slot = na->pna_parent_slot;
794 }
795 parent->na_pipes[n] = NULL;
796 }
797
798 static int
nx_upipe_na_txsync(struct __kern_channel_ring * txkring,struct proc * p,uint32_t flags)799 nx_upipe_na_txsync(struct __kern_channel_ring *txkring, struct proc *p,
800 uint32_t flags)
801 {
802 struct __kern_channel_ring *rxkring = txkring->ckr_pipe;
803 volatile uint64_t *tx_tsync, *tx_tnote, *rx_tsync;
804 int sent = 0, ret = 0;
805
806 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
807 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u "
808 "flags 0x%x -> kr \"%s\" (0x%llx) krflags 0x%b ring %u",
809 sk_proc_name_address(p), sk_proc_pid(p), txkring->ckr_name,
810 SK_KVA(txkring), txkring->ckr_flags, CKRF_BITS,
811 txkring->ckr_ring_id, flags, rxkring->ckr_name, SK_KVA(rxkring),
812 rxkring->ckr_flags, CKRF_BITS, rxkring->ckr_ring_id);
813
814 /*
815 * Serialize write access to the transmit ring, since another
816 * thread coming down for rxsync might pick up pending slots.
817 */
818 ASSERT(txkring->ckr_owner == current_thread());
819
820 /*
821 * Record the time of sync and grab sync time of other side;
822 * use atomic store and load since we're not holding the
823 * lock used by the receive ring. This allows us to avoid
824 * the potentially costly membar_sync().
825 */
826 /* deconst */
827 tx_tsync = __DECONST(uint64_t *, &txkring->ckr_ring->ring_sync_time);
828 atomic_set_64(tx_tsync, txkring->ckr_sync_time);
829
830 /*
831 * Read from the peer's kring, not its user ring; the peer's channel
832 * may be defunct, in which case it's unsafe to access its user ring.
833 */
834 rx_tsync = __DECONST(uint64_t *, &rxkring->ckr_sync_time);
835 tx_tnote = __DECONST(uint64_t *, &txkring->ckr_ring->ring_notify_time);
836 *tx_tnote = atomic_add_64_ov(rx_tsync, 0);
837
838 if (__probable(txkring->ckr_rhead != txkring->ckr_khead)) {
839 sent = nx_upipe_na_txsync_locked(txkring, p, flags,
840 &ret, FALSE);
841 }
842
843 if (sent != 0) {
844 (void) rxkring->ckr_na_notify(rxkring, p, 0);
845 }
846
847 return ret;
848 }
849
850 int
nx_upipe_na_txsync_locked(struct __kern_channel_ring * txkring,struct proc * p,uint32_t flags,int * ret,boolean_t rx)851 nx_upipe_na_txsync_locked(struct __kern_channel_ring *txkring, struct proc *p,
852 uint32_t flags, int *ret, boolean_t rx)
853 {
854 #pragma unused(p, flags, rx)
855 struct __kern_channel_ring *rxkring = txkring->ckr_pipe;
856 const slot_idx_t lim_tx = txkring->ckr_lim;
857 const slot_idx_t lim_rx = rxkring->ckr_lim;
858 slot_idx_t j, k;
859 int n, m, b, sent = 0;
860 uint32_t byte_count = 0;
861 int limit; /* max # of slots to transfer */
862
863 *ret = 0;
864
865 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
866 "%s(%d) kr \"%s\", kh %3u kt %3u | "
867 "rh %3u rt %3u [pre%s]", sk_proc_name_address(p),
868 sk_proc_pid(p), txkring->ckr_name, txkring->ckr_khead,
869 txkring->ckr_ktail, txkring->ckr_rhead,
870 txkring->ckr_rtail, rx ? "*" : "");
871 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
872 "%s(%d) kr \"%s\", kh %3u kt %3u | "
873 "rh %3u rt %3u [pre%s]", sk_proc_name_address(p),
874 sk_proc_pid(p), rxkring->ckr_name, rxkring->ckr_khead,
875 rxkring->ckr_ktail, rxkring->ckr_rhead,
876 rxkring->ckr_rtail, rx ? "*" : "");
877
878 if (__improbable(KR_DROP(txkring) || KR_DROP(rxkring))) {
879 *ret = ENXIO;
880 goto done;
881 }
882
883 j = rxkring->ckr_ktail; /* RX */
884 k = txkring->ckr_khead; /* TX */
885
886 /* # of new tx slots */
887 n = txkring->ckr_rhead - txkring->ckr_khead;
888 if (n < 0) {
889 n += txkring->ckr_num_slots;
890 }
891 limit = n;
892
893 /* # of rx busy (unclaimed) slots */
894 b = j - rxkring->ckr_khead;
895 if (b < 0) {
896 b += rxkring->ckr_num_slots;
897 }
898
899 /* # of rx avail free slots (subtract busy from max) */
900 m = lim_rx - b;
901 if (m < limit) {
902 limit = m;
903 }
904
905 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
906 "%s(%d) kr \"%s\" -> new %u, kr \"%s\" "
907 "-> free %u", sk_proc_name_address(p), sk_proc_pid(p),
908 txkring->ckr_name, n, rxkring->ckr_name, m);
909
910 /* rxring is full, or nothing to send? */
911 if (__improbable((sent = limit) == 0)) {
912 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
913 "%s(%d) kr \"%s\" -> %s%s",
914 sk_proc_name_address(p), sk_proc_pid(p), (n > m) ?
915 rxkring->ckr_name : txkring->ckr_name, ((n > m) ?
916 "no room avail" : "no new slots"),
917 (rx ? " (lost race, ok)" : ""));
918 goto done;
919 }
920
921 ASSERT(limit > 0);
922 while (limit--) {
923 struct __kern_slot_desc *ksd_tx = KR_KSD(txkring, k);
924 struct __user_slot_desc *usd_tx = KR_USD(txkring, k);
925 struct __kern_slot_desc *ksd_rx = KR_KSD(rxkring, j);
926 struct __user_slot_desc *usd_rx = KR_USD(rxkring, j);
927 struct __kern_quantum *kqum;
928
929 kqum = ksd_tx->sd_qum;
930 /*
931 * Packets failing internalization should be dropped in
932 * TX sync prologue.
933 */
934 ASSERT((kqum->qum_qflags & (QUM_F_INTERNALIZED |
935 QUM_F_FINALIZED)) == (QUM_F_INTERNALIZED |
936 QUM_F_FINALIZED));
937
938 byte_count += kqum->qum_len;
939
940 /*
941 * Swap the slots.
942 *
943 * XXX: [email protected] -- this bypasses the slot attach/detach
944 * interface, and needs to be changed when upipe adopts the
945 * packet APIs. SD_SWAP() will perform a block copy of the
946 * swap, and will readjust the kernel slot descriptor's sd_user
947 * accordingly.
948 */
949 SD_SWAP(ksd_rx, usd_rx, ksd_tx, usd_tx);
950
951 j = SLOT_NEXT(j, lim_rx);
952 k = SLOT_NEXT(k, lim_tx);
953 }
954
955 kr_update_stats(rxkring, sent, byte_count);
956 if (__improbable(kr_stat_enable != 0)) {
957 txkring->ckr_stats = rxkring->ckr_stats;
958 }
959
960 /*
961 * Make sure the slots are updated before ckr_ktail reach global
962 * visibility, since we are not holding rx ring's kr_enter().
963 */
964 membar_sync();
965
966 rxkring->ckr_ktail = j;
967 txkring->ckr_khead = k;
968 txkring->ckr_ktail = SLOT_PREV(k, lim_tx);
969
970 done:
971 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
972 "%s(%d) kr \"%s\", kh %3u kt %3u | "
973 "rh %3u rt %3u [post%s]", sk_proc_name_address(p),
974 sk_proc_pid(p), txkring->ckr_name, txkring->ckr_khead,
975 txkring->ckr_ktail, txkring->ckr_rhead,
976 txkring->ckr_rtail, rx ? "*" : "");
977 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
978 "%s(%d) kr \"%s\", kh %3u kt %3u | "
979 "rh %3u rt %3u [post%s]", sk_proc_name_address(p),
980 sk_proc_pid(p), rxkring->ckr_name, rxkring->ckr_khead,
981 rxkring->ckr_ktail, rxkring->ckr_rhead,
982 rxkring->ckr_rtail, rx ? "*" : "");
983
984 return sent;
985 }
986
987 static int
nx_upipe_na_rxsync(struct __kern_channel_ring * rxkring,struct proc * p,uint32_t flags)988 nx_upipe_na_rxsync(struct __kern_channel_ring *rxkring, struct proc *p,
989 uint32_t flags)
990 {
991 #pragma unused(p)
992 struct __kern_channel_ring *txkring = rxkring->ckr_pipe;
993 volatile uint64_t *rx_tsync, *rx_tnote, *tx_tsync;
994 const slot_idx_t lim_rx = rxkring->ckr_lim;
995 int n; /* new slots from transmit side */
996 int m, b, ret = 0;
997 uint32_t r;
998
999 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1000 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u "
1001 "flags 0x%x <- kr \"%s\" (0x%llx) krflags 0x%b ring %u",
1002 sk_proc_name_address(p), sk_proc_pid(p), rxkring->ckr_name,
1003 SK_KVA(rxkring), rxkring->ckr_flags, CKRF_BITS,
1004 rxkring->ckr_ring_id, flags, txkring->ckr_name, SK_KVA(txkring),
1005 txkring->ckr_flags, CKRF_BITS, txkring->ckr_ring_id);
1006
1007 ASSERT(rxkring->ckr_owner == current_thread());
1008
1009 /* reclaim and get # of rx reclaimed slots */
1010 r = kr_reclaim(rxkring);
1011
1012 /* # of rx busy (unclaimed) slots */
1013 b = rxkring->ckr_ktail - rxkring->ckr_khead;
1014 if (b < 0) {
1015 b += rxkring->ckr_num_slots;
1016 }
1017
1018 /* # of rx avail free slots (subtract busy from max) */
1019 m = lim_rx - b;
1020
1021 /*
1022 * Check if there's any new slots on transmit ring; do this
1023 * first without acquiring that ring's ckr_qlock, and use
1024 * the memory barrier (paired with second one in txsync.)
1025 * If we missed the race we'd just pay the cost of acquiring
1026 * ckr_qlock and potentially returning from "internal txsync"
1027 * without anything to process, which is okay.
1028 */
1029 membar_sync();
1030 n = txkring->ckr_rhead - txkring->ckr_khead;
1031 if (n < 0) {
1032 n += txkring->ckr_num_slots;
1033 }
1034
1035 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1036 "%s(%d) kr \"%s\" <- free %u, kr \"%s\" <- new %u",
1037 sk_proc_name_address(p), sk_proc_pid(p),
1038 rxkring->ckr_name, m, txkring->ckr_name, n);
1039
1040 /*
1041 * Record the time of sync and grab sync time of other side;
1042 * use atomic store and load since we're not holding the
1043 * lock used by the receive ring. This allows us to avoid
1044 * the potentially costly membar_sync().
1045 */
1046 /* deconst */
1047 rx_tsync = __DECONST(uint64_t *, &rxkring->ckr_ring->ring_sync_time);
1048 atomic_set_64(rx_tsync, rxkring->ckr_sync_time);
1049
1050 /*
1051 * Read from the peer's kring, not its user ring; the peer's channel
1052 * may be defunct, in which case it's unsafe to access its user ring.
1053 */
1054 tx_tsync = __DECONST(uint64_t *, &txkring->ckr_sync_time);
1055 rx_tnote = __DECONST(uint64_t *, &rxkring->ckr_ring->ring_notify_time);
1056 *rx_tnote = atomic_add_64_ov(tx_tsync, 0);
1057
1058 /*
1059 * If we have slots to pick up from the transmit side and and we
1060 * have space available, perform an equivalent of "internal txsync".
1061 *
1062 * Acquire write access to the transmit (peer) ring,
1063 * Serialize write access to it, since another thread
1064 * coming down for txsync might add new slots.
1065 * If we fail to get the kring lock, then don't worry because
1066 * there's already a transmit sync in progress to move packets.
1067 */
1068 if (__probable(n != 0 && m != 0 && (flags & NA_SYNCF_MONITOR) == 0)) {
1069 (void) kr_enter(txkring, TRUE);
1070 n = nx_upipe_na_txsync_locked(txkring, p, flags, &ret, TRUE);
1071 kr_exit(txkring);
1072 } else {
1073 n = 0;
1074 }
1075
1076 /*
1077 * If we have reclaimed some slots or transferred new slots
1078 * from the transmit side, notify the other end. Also notify
1079 * ourselves to pick up newly transferred ones, if any.
1080 */
1081 if (__probable(r != 0 || n != 0)) {
1082 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1083 "%s(%d) kr \"%s\", kh %3u kt %3u | "
1084 "rh %3u rt %3u [rel %u new %u]",
1085 sk_proc_name_address(p), sk_proc_pid(p), rxkring->ckr_name,
1086 rxkring->ckr_khead, rxkring->ckr_ktail,
1087 rxkring->ckr_rhead, rxkring->ckr_rtail, r, n);
1088
1089 (void) txkring->ckr_na_notify(txkring, p, 0);
1090 }
1091
1092 return ret;
1093 }
1094
1095 static int
nx_upipe_na_rings_create(struct nexus_adapter * na,struct kern_channel * ch)1096 nx_upipe_na_rings_create(struct nexus_adapter *na, struct kern_channel *ch)
1097 {
1098 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1099 struct nexus_adapter *ona = &pna->pna_peer->pna_up;
1100 int error = 0;
1101 enum txrx t;
1102 uint32_t i;
1103
1104 /*
1105 * Create krings and all the rings for this end;
1106 * we'll update ckr_save_ring pointers below.
1107 */
1108 error = na_rings_mem_setup(na, 0, FALSE, ch);
1109 if (error != 0) {
1110 goto err;
1111 }
1112
1113 /* update our hidden ring pointers */
1114 for_rx_tx(t) {
1115 for (i = 0; i < na_get_nrings(na, t); i++) {
1116 NAKR(na, t)[i].ckr_save_ring =
1117 NAKR(na, t)[i].ckr_ring;
1118 }
1119 }
1120
1121 /* now, create krings and rings of the other end */
1122 error = na_rings_mem_setup(ona, 0, FALSE, ch);
1123 if (error != 0) {
1124 na_rings_mem_teardown(na, ch, FALSE); /* this end */
1125 goto err;
1126 }
1127
1128 for_rx_tx(t) {
1129 for (i = 0; i < na_get_nrings(ona, t); i++) {
1130 NAKR(ona, t)[i].ckr_save_ring =
1131 NAKR(ona, t)[i].ckr_ring;
1132 }
1133 }
1134
1135 /* cross link the krings */
1136 for_rx_tx(t) {
1137 /* swap NR_TX <-> NR_RX (skip host ring) */
1138 enum txrx r = sk_txrx_swap(t);
1139 for (i = 0; i < na_get_nrings(na, t); i++) {
1140 NAKR(na, t)[i].ckr_pipe =
1141 NAKR(&pna->pna_peer->pna_up, r) + i;
1142 NAKR(&pna->pna_peer->pna_up, r)[i].ckr_pipe =
1143 NAKR(na, t) + i;
1144 }
1145 }
1146 err:
1147 return error;
1148 }
1149
1150 /*
1151 * Pipe endpoints are created and destroyed together, so that endopoints do not
1152 * have to check for the existence of their peer at each ?xsync.
1153 *
1154 * To play well with the existing nexus adapter infrastructure (refcounts etc.),
1155 * we adopt the following strategy:
1156 *
1157 * 1) The first endpoint that is created also creates the other endpoint and
1158 * grabs a reference to it.
1159 *
1160 * state A) user1 --> endpoint1 --> endpoint2
1161 *
1162 * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
1163 * its reference to the user:
1164 *
1165 * state B) user1 --> endpoint1 endpoint2 <--- user2
1166 *
1167 * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
1168 * callback endpoint2 notes that endpoint1 is still active and adds a reference
1169 * from endpoint1 to itself. When user2 then releases her own reference,
1170 * endpoint2 is not destroyed and we are back to state A. A symmetrical state
1171 * would be reached if endpoint1 were released instead.
1172 *
1173 * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
1174 * it owns a reference to endpoint2 and releases it.
1175 *
1176 * Something similar goes on for the creation and destruction of the krings.
1177 */
1178
1179
1180 /*
1181 * nx_upipe_na_krings_create.
1182 *
1183 * There are two cases:
1184 *
1185 * 1) state is
1186 *
1187 * usr1 --> e1 --> e2
1188 *
1189 * and we are e1. We have to create both sets
1190 * of krings.
1191 *
1192 * 2) state is
1193 *
1194 * usr1 --> e1 --> e2
1195 *
1196 * and we are e2. e1 is certainly registered and our
1197 * krings already exist, but they may be hidden.
1198 */
1199 static int
nx_upipe_na_krings_create(struct nexus_adapter * na,struct kern_channel * ch)1200 nx_upipe_na_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
1201 {
1202 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1203 int error = 0;
1204 enum txrx t;
1205 uint32_t i;
1206
1207 /*
1208 * Verify symmetrical ring counts; validated
1209 * at nexus provider registration time.
1210 */
1211 ASSERT(na_get_nrings(na, NR_TX) == na_get_nrings(na, NR_RX));
1212
1213 if (pna->pna_peer_ref) {
1214 /* case 1) above */
1215 SK_DF(SK_VERB_USER_PIPE,
1216 "0x%llx: case 1, create everything", SK_KVA(na));
1217 error = nx_upipe_na_rings_create(na, ch);
1218 } else {
1219 /* case 2) above */
1220 /* recover the hidden rings */
1221 SK_DF(SK_VERB_USER_PIPE,
1222 "0x%llx: case 2, hidden rings", SK_KVA(na));
1223 for_rx_tx(t) {
1224 for (i = 0; i < na_get_nrings(na, t); i++) {
1225 NAKR(na, t)[i].ckr_ring =
1226 NAKR(na, t)[i].ckr_save_ring;
1227 }
1228 }
1229 }
1230
1231 ASSERT(error == 0 || (na->na_tx_rings == NULL &&
1232 na->na_rx_rings == NULL && na->na_slot_ctxs == NULL));
1233 ASSERT(error == 0 || (pna->pna_peer->pna_up.na_tx_rings == NULL &&
1234 pna->pna_peer->pna_up.na_rx_rings == NULL &&
1235 pna->pna_peer->pna_up.na_slot_ctxs == NULL));
1236
1237 return error;
1238 }
1239
1240 /*
1241 * nx_upipe_na_activate.
1242 *
1243 * There are two cases on registration (onoff==1)
1244 *
1245 * 1.a) state is
1246 *
1247 * usr1 --> e1 --> e2
1248 *
1249 * and we are e1. Nothing special to do.
1250 *
1251 * 1.b) state is
1252 *
1253 * usr1 --> e1 --> e2 <-- usr2
1254 *
1255 * and we are e2. Drop the ref e1 is holding.
1256 *
1257 * There are two additional cases on unregister (onoff==0)
1258 *
1259 * 2.a) state is
1260 *
1261 * usr1 --> e1 --> e2
1262 *
1263 * and we are e1. Nothing special to do, e2 will
1264 * be cleaned up by the destructor of e1.
1265 *
1266 * 2.b) state is
1267 *
1268 * usr1 --> e1 e2 <-- usr2
1269 *
1270 * and we are either e1 or e2. Add a ref from the
1271 * other end and hide our rings.
1272 */
1273 static int
nx_upipe_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1274 nx_upipe_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1275 {
1276 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1277
1278 SK_LOCK_ASSERT_HELD();
1279
1280 SK_DF(SK_VERB_USER_PIPE, "na \"%s\" (0x%llx) %s", na->na_name,
1281 SK_KVA(na), na_activate_mode2str(mode));
1282
1283 switch (mode) {
1284 case NA_ACTIVATE_MODE_ON:
1285 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
1286 break;
1287
1288 case NA_ACTIVATE_MODE_DEFUNCT:
1289 break;
1290
1291 case NA_ACTIVATE_MODE_OFF:
1292 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
1293 break;
1294
1295 default:
1296 VERIFY(0);
1297 /* NOTREACHED */
1298 __builtin_unreachable();
1299 }
1300
1301 if (pna->pna_peer_ref) {
1302 SK_DF(SK_VERB_USER_PIPE,
1303 "0x%llx: case 1.a or 2.a, nothing to do", SK_KVA(na));
1304 return 0;
1305 }
1306
1307 switch (mode) {
1308 case NA_ACTIVATE_MODE_ON:
1309 SK_DF(SK_VERB_USER_PIPE,
1310 "0x%llx: case 1.b, drop peer", SK_KVA(na));
1311 if (pna->pna_peer->pna_peer_ref) {
1312 pna->pna_peer->pna_peer_ref = FALSE;
1313 (void) na_release_locked(na);
1314 }
1315 break;
1316
1317 case NA_ACTIVATE_MODE_OFF:
1318 SK_DF(SK_VERB_USER_PIPE,
1319 "0x%llx: case 2.b, grab peer", SK_KVA(na));
1320 if (!pna->pna_peer->pna_peer_ref) {
1321 na_retain_locked(na);
1322 pna->pna_peer->pna_peer_ref = TRUE;
1323 }
1324 break;
1325
1326 default:
1327 break;
1328 }
1329
1330 return 0;
1331 }
1332
1333 /*
1334 * nx_upipe_na_krings_delete.
1335 *
1336 * There are two cases:
1337 *
1338 * 1) state is
1339 *
1340 * usr1 --> e1 --> e2
1341 *
1342 * and we are e1 (e2 is not bound, so krings_delete cannot be
1343 * called on it);
1344 *
1345 * 2) state is
1346 *
1347 * usr1 --> e1 e2 <-- usr2
1348 *
1349 * and we are either e1 or e2.
1350 *
1351 * In the former case we have to also delete the krings of e2;
1352 * in the latter case we do nothing (note that our krings
1353 * have already been hidden in the unregister callback).
1354 */
1355 static void
nx_upipe_na_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)1356 nx_upipe_na_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
1357 boolean_t defunct)
1358 {
1359 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1360 struct nexus_adapter *ona; /* na of the other end */
1361 uint32_t i;
1362 enum txrx t;
1363
1364 SK_LOCK_ASSERT_HELD();
1365
1366 if (!pna->pna_peer_ref) {
1367 SK_DF(SK_VERB_USER_PIPE,
1368 "0x%llx: case 2, kept alive by peer", SK_KVA(na));
1369 /*
1370 * If adapter is defunct (note the explicit test against
1371 * NAF_DEFUNCT, and not the "defunct" parameter passed in
1372 * by the caller), then the peer's channel has gone defunct.
1373 * We get here because this channel was not defuncted, and
1374 * that this is the last active reference to the adapter.
1375 * At this point we tear everything down, since the caller
1376 * will proceed to destroying the memory regions.
1377 */
1378 if (na->na_flags & NAF_DEFUNCT) {
1379 na_rings_mem_teardown(na, ch, defunct);
1380 }
1381 return;
1382 }
1383
1384 /* case 1) above */
1385 SK_DF(SK_VERB_USER_PIPE,
1386 "0x%llx: case 1, deleting everyhing", SK_KVA(na));
1387
1388 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
1389
1390 /* restore the ring to be deleted on the peer */
1391 ona = &pna->pna_peer->pna_up;
1392 if (ona->na_tx_rings == NULL) {
1393 /*
1394 * Already deleted, we must be on an
1395 * cleanup-after-error path
1396 * Just delete this end
1397 */
1398 na_rings_mem_teardown(na, ch, defunct);
1399 return;
1400 }
1401
1402 /* delete the memory rings */
1403 na_rings_mem_teardown(na, ch, defunct);
1404
1405 if (!defunct) {
1406 for_rx_tx(t) {
1407 for (i = 0; i < na_get_nrings(ona, t); i++) {
1408 NAKR(ona, t)[i].ckr_ring =
1409 NAKR(ona, t)[i].ckr_save_ring;
1410 }
1411 }
1412 }
1413
1414 /* Delete the memory rings */
1415 na_rings_mem_teardown(ona, ch, defunct);
1416 }
1417
1418 static void
nx_upipe_na_dtor(struct nexus_adapter * na)1419 nx_upipe_na_dtor(struct nexus_adapter *na)
1420 {
1421 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1422 struct nx_upipe *u = NX_UPIPE_PRIVATE(na->na_nx);
1423
1424 SK_LOCK_ASSERT_HELD();
1425
1426 SK_DF(SK_VERB_USER_PIPE, "0x%llx", SK_KVA(na));
1427 if (pna->pna_peer_ref) {
1428 SK_DF(SK_VERB_USER_PIPE,
1429 "0x%llx: clean up peer 0x%llx", SK_KVA(na),
1430 SK_KVA(&pna->pna_peer->pna_up));
1431 pna->pna_peer_ref = FALSE;
1432 (void) na_release_locked(&pna->pna_peer->pna_up);
1433 }
1434 if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
1435 nx_upipe_na_remove(pna->pna_parent, pna);
1436 }
1437 (void) na_release_locked(pna->pna_parent);
1438 pna->pna_parent = NULL;
1439
1440 /* release reference to parent adapter held by nx_upipe_na_find() */
1441 ASSERT(u->nup_pna_users != 0);
1442 if (--u->nup_pna_users == 0) {
1443 ASSERT(u->nup_pna != NULL);
1444 SK_DF(SK_VERB_USER_PIPE, "release parent: \"%s\" (0x%llx)",
1445 u->nup_pna->na_name, SK_KVA(u->nup_pna));
1446 na_release_locked(u->nup_pna);
1447 u->nup_pna = NULL;
1448 }
1449 }
1450
1451 int
nx_upipe_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)1452 nx_upipe_na_find(struct kern_nexus *nx, struct kern_channel *ch,
1453 struct chreq *chr, struct nxbind *nxb, struct proc *p,
1454 struct nexus_adapter **na, boolean_t create)
1455 {
1456 #pragma unused(ch, p)
1457 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
1458 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
1459 struct nexus_adapter *pna = NULL; /* parent adapter */
1460 boolean_t anon = NX_ANONYMOUS_PROV(nx);
1461 struct nexus_upipe_adapter *mna, *sna, *req;
1462 ch_endpoint_t ep = chr->cr_endpoint;
1463 uint32_t pipe_id;
1464 int error;
1465
1466 SK_LOCK_ASSERT_HELD();
1467 *na = NULL;
1468
1469 #if SK_LOG
1470 uuid_string_t uuidstr;
1471 SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
1472 "ring_id %d ring_set %u ep_type %u:%u create %u%s",
1473 chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
1474 (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
1475 chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
1476 chr->cr_real_endpoint, chr->cr_endpoint, create,
1477 (ep != CH_ENDPOINT_USER_PIPE_MASTER &&
1478 ep != CH_ENDPOINT_USER_PIPE_SLAVE) ? " (skipped)" : "");
1479 #endif /* SK_LOG */
1480
1481 if (ep != CH_ENDPOINT_USER_PIPE_MASTER &&
1482 ep != CH_ENDPOINT_USER_PIPE_SLAVE) {
1483 return 0;
1484 }
1485
1486 /*
1487 * Check client credentials.
1488 */
1489 if (chr->cr_port == NEXUS_PORT_USER_PIPE_SERVER) {
1490 if (!anon && (u->nup_srv_nxb == NULL || nxb == NULL ||
1491 !nxb_is_equal(u->nup_srv_nxb, nxb))) {
1492 return EACCES;
1493 }
1494 } else {
1495 ASSERT(chr->cr_port == NEXUS_PORT_USER_PIPE_CLIENT);
1496 if (!anon && (u->nup_cli_nxb == NULL || nxb == NULL ||
1497 !nxb_is_equal(u->nup_cli_nxb, nxb))) {
1498 return EACCES;
1499 }
1500 }
1501
1502 /*
1503 * First, try to find a previously-created parent adapter
1504 * for this nexus; else, create one and store it in the
1505 * nexus. We'll release this at nexus destructor time.
1506 */
1507 if ((pna = u->nup_pna) != NULL) {
1508 na_retain_locked(pna); /* for us */
1509 SK_DF(SK_VERB_USER_PIPE, "found parent: \"%s\" (0x%llx)",
1510 pna->na_name, SK_KVA(pna));
1511 } else {
1512 /* callee will hold a reference for us upon success */
1513 error = na_pseudo_create(nx, chr, &pna);
1514 if (error != 0) {
1515 SK_ERR("parent create failed: %d", error);
1516 return error;
1517 }
1518 /* hold an extra reference for nx_upipe */
1519 u->nup_pna = pna;
1520 na_retain_locked(pna);
1521 SK_DF(SK_VERB_USER_PIPE, "created parent: \"%s\" (0x%llx)",
1522 pna->na_name, SK_KVA(pna));
1523 }
1524
1525 /* next, lookup the pipe id in the parent list */
1526 req = NULL;
1527 pipe_id = chr->cr_pipe_id;
1528 mna = nx_upipe_find(pna, pipe_id);
1529 if (mna != NULL) {
1530 if (mna->pna_role == ep) {
1531 SK_DF(SK_VERB_USER_PIPE,
1532 "found pipe_id %u directly at slot %u",
1533 pipe_id, mna->pna_parent_slot);
1534 req = mna;
1535 } else {
1536 SK_DF(SK_VERB_USER_PIPE,
1537 "found pipe_id %u indirectly at slot %u",
1538 pipe_id, mna->pna_parent_slot);
1539 req = mna->pna_peer;
1540 }
1541 /*
1542 * The pipe we have found already holds a ref to the parent,
1543 * so we need to drop the one we got from above.
1544 */
1545 (void) na_release_locked(pna);
1546 goto found;
1547 }
1548 SK_DF(SK_VERB_USER_PIPE,
1549 "pipe_id %u not found, create %u", pipe_id, create);
1550 if (!create) {
1551 error = ENODEV;
1552 goto put_out;
1553 }
1554 /*
1555 * We create both master and slave.
1556 * The endpoint we were asked for holds a reference to
1557 * the other one.
1558 */
1559 mna = na_upipe_alloc(Z_WAITOK);
1560
1561 ASSERT(mna->pna_up.na_type == NA_USER_PIPE);
1562 ASSERT(mna->pna_up.na_free == na_upipe_free);
1563
1564 (void) snprintf(mna->pna_up.na_name, sizeof(mna->pna_up.na_name),
1565 "%s{%u", pna->na_name, pipe_id);
1566 uuid_generate_random(mna->pna_up.na_uuid);
1567
1568 mna->pna_id = pipe_id;
1569 mna->pna_role = CH_ENDPOINT_USER_PIPE_MASTER;
1570 mna->pna_parent = pna;
1571 mna->pna_up.na_txsync = nx_upipe_na_txsync;
1572 mna->pna_up.na_rxsync = nx_upipe_na_rxsync;
1573 mna->pna_up.na_activate = nx_upipe_na_activate;
1574 mna->pna_up.na_dtor = nx_upipe_na_dtor;
1575 mna->pna_up.na_krings_create = nx_upipe_na_krings_create;
1576 mna->pna_up.na_krings_delete = nx_upipe_na_krings_delete;
1577 mna->pna_up.na_arena = pna->na_arena;
1578 skmem_arena_retain((&mna->pna_up)->na_arena);
1579 atomic_bitset_32(&mna->pna_up.na_flags, NAF_MEM_LOANED);
1580 *(nexus_meta_type_t *)(uintptr_t)&mna->pna_up.na_md_type =
1581 pna->na_md_type;
1582 *(nexus_meta_subtype_t *)(uintptr_t)&mna->pna_up.na_md_subtype =
1583 pna->na_md_subtype;
1584
1585 *(nexus_stats_type_t *)(uintptr_t)&mna->pna_up.na_stats_type =
1586 NEXUS_STATS_TYPE_INVALID;
1587 *(uint32_t *)(uintptr_t)&mna->pna_up.na_flowadv_max =
1588 nxp->nxp_flowadv_max;
1589 ASSERT(mna->pna_up.na_flowadv_max == 0 ||
1590 skmem_arena_nexus(mna->pna_up.na_arena)->arn_flowadv_obj != NULL);
1591
1592 /*
1593 * Parent adapter parameters must match the nexus provider's by the
1594 * time we get here, since na_find() above shouldn't return
1595 * one otherwise.
1596 */
1597 na_set_nrings(&mna->pna_up, NR_TX, nxp->nxp_tx_rings);
1598 na_set_nrings(&mna->pna_up, NR_RX, nxp->nxp_rx_rings);
1599 na_set_nslots(&mna->pna_up, NR_TX, nxp->nxp_tx_slots);
1600 na_set_nslots(&mna->pna_up, NR_RX, nxp->nxp_rx_slots);
1601 ASSERT(na_get_nrings(&mna->pna_up, NR_TX) == na_get_nrings(pna, NR_TX));
1602 ASSERT(na_get_nrings(&mna->pna_up, NR_RX) == na_get_nrings(pna, NR_RX));
1603 ASSERT(na_get_nslots(&mna->pna_up, NR_TX) == na_get_nslots(pna, NR_TX));
1604 ASSERT(na_get_nslots(&mna->pna_up, NR_RX) == na_get_nslots(pna, NR_RX));
1605
1606 na_attach_common(&mna->pna_up, nx, &nx_upipe_prov_s);
1607
1608 /* register the master with the parent */
1609 error = nx_upipe_na_add(pna, mna);
1610 if (error != 0) {
1611 goto free_mna;
1612 }
1613
1614 /* create the slave */
1615 sna = na_upipe_alloc(Z_WAITOK);
1616
1617 /* most fields are the same, copy from master and then fix */
1618 bcopy(mna, sna, sizeof(*sna));
1619 skmem_arena_retain((&sna->pna_up)->na_arena);
1620 atomic_bitset_32(&sna->pna_up.na_flags, NAF_MEM_LOANED);
1621
1622 ASSERT(sna->pna_up.na_type == NA_USER_PIPE);
1623 ASSERT(sna->pna_up.na_free == na_upipe_free);
1624
1625 (void) snprintf(sna->pna_up.na_name, sizeof(sna->pna_up.na_name),
1626 "%s}%d", pna->na_name, pipe_id);
1627 uuid_generate_random(sna->pna_up.na_uuid);
1628
1629 sna->pna_role = CH_ENDPOINT_USER_PIPE_SLAVE;
1630 na_attach_common(&sna->pna_up, nx, &nx_upipe_prov_s);
1631
1632 /* join the two endpoints */
1633 mna->pna_peer = sna;
1634 sna->pna_peer = mna;
1635
1636 /*
1637 * We already have a reference to the parent, but we
1638 * need another one for the other endpoint we created
1639 */
1640 na_retain_locked(pna);
1641
1642 if ((chr->cr_mode & CHMODE_DEFUNCT_OK) != 0) {
1643 atomic_bitset_32(&pna->na_flags, NAF_DEFUNCT_OK);
1644 }
1645
1646 if (ep == CH_ENDPOINT_USER_PIPE_MASTER) {
1647 req = mna;
1648 mna->pna_peer_ref = TRUE;
1649 na_retain_locked(&sna->pna_up);
1650 } else {
1651 req = sna;
1652 sna->pna_peer_ref = TRUE;
1653 na_retain_locked(&mna->pna_up);
1654 }
1655
1656 /* parent adapter now has two users (mna and sna) */
1657 u->nup_pna_users += 2;
1658
1659 #if SK_LOG
1660 SK_DF(SK_VERB_USER_PIPE, "created master 0x%llx and slave 0x%llx",
1661 SK_KVA(mna), SK_KVA(sna));
1662 SK_DF(SK_VERB_USER_PIPE, "mna: \"%s\"", mna->pna_up.na_name);
1663 SK_DF(SK_VERB_USER_PIPE, " UUID: %s",
1664 sk_uuid_unparse(mna->pna_up.na_uuid, uuidstr));
1665 SK_DF(SK_VERB_USER_PIPE, " nx: 0x%llx (\"%s\":\"%s\")",
1666 SK_KVA(mna->pna_up.na_nx), NX_DOM(mna->pna_up.na_nx)->nxdom_name,
1667 NX_DOM_PROV(mna->pna_up.na_nx)->nxdom_prov_name);
1668 SK_DF(SK_VERB_USER_PIPE, " flags: 0x%b",
1669 mna->pna_up.na_flags, NAF_BITS);
1670 SK_DF(SK_VERB_USER_PIPE, " flowadv_max: %u",
1671 mna->pna_up.na_flowadv_max);
1672 SK_DF(SK_VERB_USER_PIPE, " rings: tx %u rx %u",
1673 na_get_nrings(&mna->pna_up, NR_TX),
1674 na_get_nrings(&mna->pna_up, NR_RX));
1675 SK_DF(SK_VERB_USER_PIPE, " slots: tx %u rx %u",
1676 na_get_nslots(&mna->pna_up, NR_TX),
1677 na_get_nslots(&mna->pna_up, NR_RX));
1678 SK_DF(SK_VERB_USER_PIPE, " next_pipe: %u", mna->pna_up.na_next_pipe);
1679 SK_DF(SK_VERB_USER_PIPE, " max_pipes: %u", mna->pna_up.na_max_pipes);
1680 SK_DF(SK_VERB_USER_PIPE, " parent: \"%s\"",
1681 mna->pna_parent->na_name);
1682 SK_DF(SK_VERB_USER_PIPE, " id: %u", mna->pna_id);
1683 SK_DF(SK_VERB_USER_PIPE, " role: %u", mna->pna_role);
1684 SK_DF(SK_VERB_USER_PIPE, " peer_ref: %u", mna->pna_peer_ref);
1685 SK_DF(SK_VERB_USER_PIPE, " parent_slot: %u", mna->pna_parent_slot);
1686 SK_DF(SK_VERB_USER_PIPE, "sna: \"%s\"", sna->pna_up.na_name);
1687 SK_DF(SK_VERB_USER_PIPE, " UUID: %s",
1688 sk_uuid_unparse(sna->pna_up.na_uuid, uuidstr));
1689 SK_DF(SK_VERB_USER_PIPE, " nx: 0x%llx (\"%s\":\"%s\")",
1690 SK_KVA(sna->pna_up.na_nx), NX_DOM(sna->pna_up.na_nx)->nxdom_name,
1691 NX_DOM_PROV(sna->pna_up.na_nx)->nxdom_prov_name);
1692 SK_DF(SK_VERB_USER_PIPE, " flags: 0x%b",
1693 sna->pna_up.na_flags, NAF_BITS);
1694 SK_DF(SK_VERB_USER_PIPE, " flowadv_max: %u",
1695 sna->pna_up.na_flowadv_max);
1696 SK_DF(SK_VERB_USER_PIPE, " rings: tx %u rx %u",
1697 na_get_nrings(&sna->pna_up, NR_TX),
1698 na_get_nrings(&sna->pna_up, NR_RX));
1699 SK_DF(SK_VERB_USER_PIPE, " slots: tx %u rx %u",
1700 na_get_nslots(&sna->pna_up, NR_TX),
1701 na_get_nslots(&sna->pna_up, NR_RX));
1702 SK_DF(SK_VERB_USER_PIPE, " next_pipe: %u", sna->pna_up.na_next_pipe);
1703 SK_DF(SK_VERB_USER_PIPE, " max_pipes: %u", sna->pna_up.na_max_pipes);
1704 SK_DF(SK_VERB_USER_PIPE, " parent: \"%s\"",
1705 sna->pna_parent->na_name);
1706 SK_DF(SK_VERB_USER_PIPE, " id: %u", sna->pna_id);
1707 SK_DF(SK_VERB_USER_PIPE, " role: %u", sna->pna_role);
1708 SK_DF(SK_VERB_USER_PIPE, " peer_ref: %u", sna->pna_peer_ref);
1709 SK_DF(SK_VERB_USER_PIPE, " parent_slot: %u", sna->pna_parent_slot);
1710 #endif /* SK_LOG */
1711
1712 found:
1713
1714 SK_DF(SK_VERB_USER_PIPE, "pipe_id %u role %s at 0x%llx", pipe_id,
1715 (req->pna_role == CH_ENDPOINT_USER_PIPE_MASTER ?
1716 "master" : "slave"), SK_KVA(req));
1717 if ((chr->cr_mode & CHMODE_DEFUNCT_OK) == 0) {
1718 atomic_bitclear_32(&pna->na_flags, NAF_DEFUNCT_OK);
1719 }
1720 *na = &req->pna_up;
1721 na_retain_locked(*na);
1722
1723 /*
1724 * Keep the reference to the parent; it will be released
1725 * by the adapter's destructor.
1726 */
1727 return 0;
1728
1729 free_mna:
1730 if (mna->pna_up.na_arena != NULL) {
1731 skmem_arena_release((&mna->pna_up)->na_arena);
1732 mna->pna_up.na_arena = NULL;
1733 }
1734 NA_FREE(&mna->pna_up);
1735 put_out:
1736 (void) na_release_locked(pna);
1737 return error;
1738 }
1739
1740 static struct nx_upipe *
nx_upipe_alloc(zalloc_flags_t how)1741 nx_upipe_alloc(zalloc_flags_t how)
1742 {
1743 struct nx_upipe *u;
1744
1745 SK_LOCK_ASSERT_HELD();
1746
1747 u = zalloc_flags(nx_upipe_zone, how | Z_ZERO);
1748 if (u) {
1749 SK_DF(SK_VERB_MEM, "upipe 0x%llx ALLOC", SK_KVA(u));
1750 }
1751 return u;
1752 }
1753
1754 static void
nx_upipe_free(struct nx_upipe * u)1755 nx_upipe_free(struct nx_upipe *u)
1756 {
1757 ASSERT(u->nup_pna == NULL);
1758 ASSERT(u->nup_pna_users == 0);
1759 ASSERT(u->nup_cli_nxb == NULL);
1760 ASSERT(u->nup_srv_nxb == NULL);
1761
1762 SK_DF(SK_VERB_MEM, "upipe 0x%llx FREE", SK_KVA(u));
1763 zfree(nx_upipe_zone, u);
1764 }
1765