1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53
54 #include <skywalk/os_skywalk_private.h>
55 #include <skywalk/nexus/upipe/nx_user_pipe.h>
56
57 #define NX_UPIPE_RINGSIZE 128 /* default ring size */
58 #define NX_UPIPE_MAXRINGS NX_MAX_NUM_RING_PAIR
59 #define NX_UPIPE_MINSLOTS 2 /* XXX same as above */
60 #define NX_UPIPE_MAXSLOTS 4096 /* XXX same as above */
61 #define NX_UPIPE_BUFSIZE (2 * 1024)
62 #define NX_UPIPE_MINBUFSIZE 1024
63 #define NX_UPIPE_MAXBUFSIZE (16 * 1024)
64 #define NX_UPIPE_MHINTS NEXUS_MHINTS_NORMAL
65
66 static int nx_upipe_na_alloc(struct nexus_adapter *, uint32_t);
67 static struct nexus_upipe_adapter *nx_upipe_find(struct nexus_adapter *,
68 uint32_t);
69 static int nx_upipe_na_add(struct nexus_adapter *,
70 struct nexus_upipe_adapter *);
71 static void nx_upipe_na_remove(struct nexus_adapter *,
72 struct nexus_upipe_adapter *);
73 static int nx_upipe_na_txsync(struct __kern_channel_ring *,
74 struct proc *, uint32_t);
75 static int nx_upipe_na_txsync_locked(struct __kern_channel_ring *,
76 struct proc *, uint32_t, int *, boolean_t);
77 static int nx_upipe_na_rxsync(struct __kern_channel_ring *,
78 struct proc *, uint32_t);
79 static int nx_upipe_na_krings_create(struct nexus_adapter *,
80 struct kern_channel *);
81 static int nx_upipe_na_activate(struct nexus_adapter *, na_activate_mode_t);
82 static void nx_upipe_na_krings_delete(struct nexus_adapter *,
83 struct kern_channel *, boolean_t);
84 static void nx_upipe_na_dtor(struct nexus_adapter *);
85
86 static void nx_upipe_dom_init(struct nxdom *);
87 static void nx_upipe_dom_terminate(struct nxdom *);
88 static void nx_upipe_dom_fini(struct nxdom *);
89 static int nx_upipe_dom_bind_port(struct kern_nexus *, nexus_port_t *,
90 struct nxbind *, void *);
91 static int nx_upipe_dom_unbind_port(struct kern_nexus *, nexus_port_t);
92 static int nx_upipe_dom_connect(struct kern_nexus_domain_provider *,
93 struct kern_nexus *, struct kern_channel *, struct chreq *,
94 struct kern_channel *, struct nxbind *, struct proc *);
95 static void nx_upipe_dom_disconnect(struct kern_nexus_domain_provider *,
96 struct kern_nexus *, struct kern_channel *);
97 static void nx_upipe_dom_defunct(struct kern_nexus_domain_provider *,
98 struct kern_nexus *, struct kern_channel *, struct proc *);
99 static void nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider *,
100 struct kern_nexus *, struct kern_channel *, boolean_t);
101
102 static int nx_upipe_prov_init(struct kern_nexus_domain_provider *);
103 static int nx_upipe_prov_params_adjust(
104 const struct kern_nexus_domain_provider *, const struct nxprov_params *,
105 struct nxprov_adjusted_params *);
106 static int nx_upipe_prov_params(struct kern_nexus_domain_provider *,
107 const uint32_t, const struct nxprov_params *, struct nxprov_params *,
108 struct skmem_region_params[SKMEM_REGIONS], uint32_t);
109 static int nx_upipe_prov_mem_new(struct kern_nexus_domain_provider *,
110 struct kern_nexus *, struct nexus_adapter *);
111 static void nx_upipe_prov_fini(struct kern_nexus_domain_provider *);
112 static int nx_upipe_prov_nx_ctor(struct kern_nexus *);
113 static void nx_upipe_prov_nx_dtor(struct kern_nexus *);
114
115 static struct nexus_upipe_adapter *na_upipe_alloc(zalloc_flags_t);
116 static void na_upipe_free(struct nexus_adapter *);
117
118 static struct nx_upipe *nx_upipe_alloc(zalloc_flags_t);
119 static void nx_upipe_free(struct nx_upipe *);
120
121 #if (DEVELOPMENT || DEBUG)
122 static uint32_t nx_upipe_mhints = 0;
123 SYSCTL_NODE(_kern_skywalk, OID_AUTO, upipe, CTLFLAG_RW | CTLFLAG_LOCKED,
124 0, "Skywalk upipe tuning");
125 SYSCTL_UINT(_kern_skywalk_upipe, OID_AUTO, nx_mhints,
126 CTLFLAG_RW | CTLFLAG_LOCKED, &nx_upipe_mhints, 0,
127 "upipe nexus memory usage hints");
128 #endif /* (DEVELOPMENT || DEBUG) */
129
130 struct nxdom nx_upipe_dom_s = {
131 .nxdom_prov_head =
132 STAILQ_HEAD_INITIALIZER(nx_upipe_dom_s.nxdom_prov_head),
133 .nxdom_type = NEXUS_TYPE_USER_PIPE,
134 .nxdom_md_type = NEXUS_META_TYPE_QUANTUM,
135 .nxdom_md_subtype = NEXUS_META_SUBTYPE_PAYLOAD,
136 .nxdom_name = "upipe",
137 .nxdom_ports = {
138 .nb_def = 2,
139 .nb_min = 2,
140 .nb_max = 2,
141 },
142 .nxdom_tx_rings = {
143 .nb_def = 1,
144 .nb_min = 1,
145 .nb_max = NX_UPIPE_MAXRINGS,
146 },
147 .nxdom_rx_rings = {
148 .nb_def = 1,
149 .nb_min = 1,
150 .nb_max = NX_UPIPE_MAXRINGS,
151 },
152 .nxdom_tx_slots = {
153 .nb_def = NX_UPIPE_RINGSIZE,
154 .nb_min = NX_UPIPE_MINSLOTS,
155 .nb_max = NX_UPIPE_MAXSLOTS,
156 },
157 .nxdom_rx_slots = {
158 .nb_def = NX_UPIPE_RINGSIZE,
159 .nb_min = NX_UPIPE_MINSLOTS,
160 .nb_max = NX_UPIPE_MAXSLOTS,
161 },
162 .nxdom_buf_size = {
163 .nb_def = NX_UPIPE_BUFSIZE,
164 .nb_min = NX_UPIPE_MINBUFSIZE,
165 .nb_max = NX_UPIPE_MAXBUFSIZE,
166 },
167 .nxdom_large_buf_size = {
168 .nb_def = 0,
169 .nb_min = 0,
170 .nb_max = 0,
171 },
172 .nxdom_meta_size = {
173 .nb_def = NX_METADATA_OBJ_MIN_SZ,
174 .nb_min = NX_METADATA_OBJ_MIN_SZ,
175 .nb_max = NX_METADATA_USR_MAX_SZ,
176 },
177 .nxdom_stats_size = {
178 .nb_def = 0,
179 .nb_min = 0,
180 .nb_max = NX_STATS_MAX_SZ,
181 },
182 .nxdom_pipes = {
183 .nb_def = 0,
184 .nb_min = 0,
185 .nb_max = NX_UPIPE_MAXPIPES,
186 },
187 .nxdom_mhints = {
188 .nb_def = NX_UPIPE_MHINTS,
189 .nb_min = NEXUS_MHINTS_NORMAL,
190 .nb_max = (NEXUS_MHINTS_NORMAL | NEXUS_MHINTS_WILLNEED |
191 NEXUS_MHINTS_LOWLATENCY | NEXUS_MHINTS_HIUSE),
192 },
193 .nxdom_flowadv_max = {
194 .nb_def = 0,
195 .nb_min = 0,
196 .nb_max = NX_FLOWADV_MAX,
197 },
198 .nxdom_nexusadv_size = {
199 .nb_def = 0,
200 .nb_min = 0,
201 .nb_max = NX_NEXUSADV_MAX_SZ,
202 },
203 .nxdom_capabilities = {
204 .nb_def = NXPCAP_USER_CHANNEL,
205 .nb_min = NXPCAP_USER_CHANNEL,
206 .nb_max = NXPCAP_USER_CHANNEL,
207 },
208 .nxdom_qmap = {
209 .nb_def = NEXUS_QMAP_TYPE_INVALID,
210 .nb_min = NEXUS_QMAP_TYPE_INVALID,
211 .nb_max = NEXUS_QMAP_TYPE_INVALID,
212 },
213 .nxdom_max_frags = {
214 .nb_def = NX_PBUF_FRAGS_DEFAULT,
215 .nb_min = NX_PBUF_FRAGS_MIN,
216 .nb_max = NX_PBUF_FRAGS_DEFAULT,
217 },
218 .nxdom_init = nx_upipe_dom_init,
219 .nxdom_terminate = nx_upipe_dom_terminate,
220 .nxdom_fini = nx_upipe_dom_fini,
221 .nxdom_find_port = NULL,
222 .nxdom_port_is_reserved = NULL,
223 .nxdom_bind_port = nx_upipe_dom_bind_port,
224 .nxdom_unbind_port = nx_upipe_dom_unbind_port,
225 .nxdom_connect = nx_upipe_dom_connect,
226 .nxdom_disconnect = nx_upipe_dom_disconnect,
227 .nxdom_defunct = nx_upipe_dom_defunct,
228 .nxdom_defunct_finalize = nx_upipe_dom_defunct_finalize,
229 };
230
231 static struct kern_nexus_domain_provider nx_upipe_prov_s = {
232 .nxdom_prov_name = NEXUS_PROVIDER_USER_PIPE,
233 .nxdom_prov_flags = NXDOMPROVF_DEFAULT,
234 .nxdom_prov_cb = {
235 .dp_cb_init = nx_upipe_prov_init,
236 .dp_cb_fini = nx_upipe_prov_fini,
237 .dp_cb_params = nx_upipe_prov_params,
238 .dp_cb_mem_new = nx_upipe_prov_mem_new,
239 .dp_cb_config = NULL,
240 .dp_cb_nx_ctor = nx_upipe_prov_nx_ctor,
241 .dp_cb_nx_dtor = nx_upipe_prov_nx_dtor,
242 .dp_cb_nx_mem_info = NULL,
243 .dp_cb_nx_mib_get = NULL,
244 .dp_cb_nx_stop = NULL,
245 },
246 };
247
248 static SKMEM_TYPE_DEFINE(na_upipe_zone, struct nexus_upipe_adapter);
249
250 static SKMEM_TYPE_DEFINE(nx_upipe_zone, struct nx_upipe);
251
252 #define SKMEM_TAG_PIPES "com.apple.skywalk.pipes"
253 static SKMEM_TAG_DEFINE(skmem_tag_pipes, SKMEM_TAG_PIPES);
254
255 static void
nx_upipe_dom_init(struct nxdom * nxdom)256 nx_upipe_dom_init(struct nxdom *nxdom)
257 {
258 SK_LOCK_ASSERT_HELD();
259 ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
260
261 (void) nxdom_prov_add(nxdom, &nx_upipe_prov_s);
262 }
263
264 static void
nx_upipe_dom_terminate(struct nxdom * nxdom)265 nx_upipe_dom_terminate(struct nxdom *nxdom)
266 {
267 struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
268
269 STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
270 nxdom_prov_link, tnxdp) {
271 (void) nxdom_prov_del(nxdom_prov);
272 }
273 }
274
275 static void
nx_upipe_dom_fini(struct nxdom * nxdom)276 nx_upipe_dom_fini(struct nxdom *nxdom)
277 {
278 #pragma unused(nxdom)
279 }
280
281 static int
nx_upipe_prov_init(struct kern_nexus_domain_provider * nxdom_prov)282 nx_upipe_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
283 {
284 #pragma unused(nxdom_prov)
285 SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
286 return 0;
287 }
288
289 static int
nx_upipe_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)290 nx_upipe_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
291 const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
292 {
293 #pragma unused(nxdom_prov, nxp)
294 /*
295 * User pipe requires double the amount of rings.
296 * The ring counts must also be symmetrical.
297 */
298 if (*(adj->adj_tx_rings) != *(adj->adj_rx_rings)) {
299 SK_ERR("rings: tx (%u) != rx (%u)", *(adj->adj_tx_rings),
300 *(adj->adj_rx_rings));
301 return EINVAL;
302 }
303 *(adj->adj_tx_rings) *= 2;
304 *(adj->adj_rx_rings) *= 2;
305 return 0;
306 }
307
308 static int
nx_upipe_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS],uint32_t pp_region_config_flags)309 nx_upipe_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
310 const uint32_t req, const struct nxprov_params *nxp0,
311 struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS],
312 uint32_t pp_region_config_flags)
313 {
314 struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
315 int err;
316
317 err = nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
318 nxdom, nxdom, nxdom, pp_region_config_flags,
319 nx_upipe_prov_params_adjust);
320 #if (DEVELOPMENT || DEBUG)
321 /* sysctl override */
322 if ((err == 0) && (nx_upipe_mhints != 0)) {
323 nxp->nxp_mhints = nx_upipe_mhints;
324 }
325 #endif /* (DEVELOPMENT || DEBUG) */
326 return err;
327 }
328
329 static int
nx_upipe_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)330 nx_upipe_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
331 struct kern_nexus *nx, struct nexus_adapter *na)
332 {
333 #pragma unused(nxdom_prov)
334 int err = 0;
335
336 SK_DF(SK_VERB_USER_PIPE,
337 "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
338 NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
339 SK_KVA(na));
340
341 ASSERT(na->na_arena == NULL);
342 ASSERT(NX_USER_CHANNEL_PROV(nx));
343 /*
344 * The underlying nexus adapters already share the same memory
345 * allocator, and thus we don't care about storing the pp in
346 * the nexus.
347 *
348 * This means that clients calling kern_nexus_get_pbufpool()
349 * will get NULL, but this is fine since we don't expose the
350 * user pipe to external kernel clients.
351 */
352 na->na_arena = skmem_arena_create_for_nexus(na,
353 NX_PROV(nx)->nxprov_region_params, NULL, NULL, 0, NULL, &err);
354 ASSERT(na->na_arena != NULL || err != 0);
355
356 return err;
357 }
358
359 static void
nx_upipe_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)360 nx_upipe_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
361 {
362 #pragma unused(nxdom_prov)
363 SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
364 }
365
366 static int
nx_upipe_prov_nx_ctor(struct kern_nexus * nx)367 nx_upipe_prov_nx_ctor(struct kern_nexus *nx)
368 {
369 SK_LOCK_ASSERT_HELD();
370 ASSERT(nx->nx_arg == NULL);
371
372 SK_D("nexus 0x%llx (%s)", SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name);
373
374 nx->nx_arg = nx_upipe_alloc(Z_WAITOK);
375 SK_D("create new upipe 0x%llx for nexus 0x%llx",
376 SK_KVA(NX_UPIPE_PRIVATE(nx)), SK_KVA(nx));
377
378 return 0;
379 }
380
381 static void
nx_upipe_prov_nx_dtor(struct kern_nexus * nx)382 nx_upipe_prov_nx_dtor(struct kern_nexus *nx)
383 {
384 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
385
386 SK_LOCK_ASSERT_HELD();
387
388 SK_D("nexus 0x%llx (%s) upipe 0x%llx", SK_KVA(nx),
389 NX_DOM_PROV(nx)->nxdom_prov_name, SK_KVA(u));
390
391 if (u->nup_cli_nxb != NULL) {
392 nxb_free(u->nup_cli_nxb);
393 u->nup_cli_nxb = NULL;
394 }
395 if (u->nup_srv_nxb != NULL) {
396 nxb_free(u->nup_srv_nxb);
397 u->nup_srv_nxb = NULL;
398 }
399
400 SK_DF(SK_VERB_USER_PIPE, "marking upipe 0x%llx as free", SK_KVA(u));
401 nx_upipe_free(u);
402 nx->nx_arg = NULL;
403 }
404
405 static struct nexus_upipe_adapter *
na_upipe_alloc(zalloc_flags_t how)406 na_upipe_alloc(zalloc_flags_t how)
407 {
408 struct nexus_upipe_adapter *pna;
409
410 _CASSERT(offsetof(struct nexus_upipe_adapter, pna_up) == 0);
411
412 pna = zalloc_flags(na_upipe_zone, how | Z_ZERO);
413 if (pna) {
414 pna->pna_up.na_type = NA_USER_PIPE;
415 pna->pna_up.na_free = na_upipe_free;
416 }
417 return pna;
418 }
419
420 static void
na_upipe_free(struct nexus_adapter * na)421 na_upipe_free(struct nexus_adapter *na)
422 {
423 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
424
425 ASSERT(pna->pna_up.na_refcount == 0);
426 SK_DF(SK_VERB_MEM, "pna 0x%llx FREE", SK_KVA(pna));
427 bzero(pna, sizeof(*pna));
428 zfree(na_upipe_zone, pna);
429 }
430
431 static int
nx_upipe_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb0,void * info)432 nx_upipe_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
433 struct nxbind *nxb0, void *info)
434 {
435 #pragma unused(info)
436 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
437 struct nxbind *nxb = NULL;
438 int error = 0;
439
440 ASSERT(nx_port != NULL);
441 ASSERT(nxb0 != NULL);
442
443 switch (*nx_port) {
444 case NEXUS_PORT_USER_PIPE_CLIENT:
445 case NEXUS_PORT_USER_PIPE_SERVER:
446 if ((*nx_port == NEXUS_PORT_USER_PIPE_CLIENT &&
447 u->nup_cli_nxb != NULL) ||
448 (*nx_port == NEXUS_PORT_USER_PIPE_SERVER &&
449 u->nup_srv_nxb != NULL)) {
450 error = EEXIST;
451 break;
452 }
453
454 nxb = nxb_alloc(Z_WAITOK);
455 nxb_move(nxb0, nxb);
456 if (*nx_port == NEXUS_PORT_USER_PIPE_CLIENT) {
457 u->nup_cli_nxb = nxb;
458 } else {
459 u->nup_srv_nxb = nxb;
460 }
461
462 ASSERT(error == 0);
463 break;
464
465 default:
466 error = EDOM;
467 break;
468 }
469
470 return error;
471 }
472
473 static int
nx_upipe_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)474 nx_upipe_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
475 {
476 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
477 struct nxbind *nxb = NULL;
478 int error = 0;
479
480 ASSERT(nx_port != NEXUS_PORT_ANY);
481
482 switch (nx_port) {
483 case NEXUS_PORT_USER_PIPE_CLIENT:
484 case NEXUS_PORT_USER_PIPE_SERVER:
485 if ((nx_port == NEXUS_PORT_USER_PIPE_CLIENT &&
486 u->nup_cli_nxb == NULL) ||
487 (nx_port == NEXUS_PORT_USER_PIPE_SERVER &&
488 u->nup_srv_nxb == NULL)) {
489 error = ENOENT;
490 break;
491 }
492
493 if (nx_port == NEXUS_PORT_USER_PIPE_CLIENT) {
494 nxb = u->nup_cli_nxb;
495 u->nup_cli_nxb = NULL;
496 } else {
497 nxb = u->nup_srv_nxb;
498 u->nup_srv_nxb = NULL;
499 }
500 nxb_free(nxb);
501 ASSERT(error == 0);
502 break;
503
504 default:
505 error = EDOM;
506 break;
507 }
508
509 return error;
510 }
511
512 static int
nx_upipe_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)513 nx_upipe_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
514 struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
515 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
516 {
517 #pragma unused(nxdom_prov)
518 nexus_port_t port = chr->cr_port;
519 int err = 0;
520
521 SK_LOCK_ASSERT_HELD();
522
523 ASSERT(NX_DOM_PROV(nx) == nxdom_prov);
524 ASSERT(nx->nx_prov->nxprov_params->nxp_type ==
525 nxdom_prov->nxdom_prov_dom->nxdom_type &&
526 nx->nx_prov->nxprov_params->nxp_type == NEXUS_TYPE_USER_PIPE);
527
528 /*
529 * XXX: channel in user packet pool mode is not supported for
530 * user-pipe for now.
531 */
532 if (chr->cr_mode & CHMODE_USER_PACKET_POOL) {
533 SK_ERR("User packet pool mode not supported for upipe");
534 err = ENOTSUP;
535 goto done;
536 }
537
538 if (chr->cr_mode & CHMODE_EVENT_RING) {
539 SK_ERR("event ring is not supported for upipe");
540 err = ENOTSUP;
541 goto done;
542 }
543
544 if (chr->cr_mode & CHMODE_LOW_LATENCY) {
545 SK_ERR("low latency is not supported for upipe");
546 err = ENOTSUP;
547 goto done;
548 }
549
550 if (port == NEXUS_PORT_USER_PIPE_SERVER) {
551 chr->cr_real_endpoint = CH_ENDPOINT_USER_PIPE_MASTER;
552 } else if (port == NEXUS_PORT_USER_PIPE_CLIENT) {
553 chr->cr_real_endpoint = CH_ENDPOINT_USER_PIPE_SLAVE;
554 } else {
555 err = EINVAL;
556 goto done;
557 }
558
559 chr->cr_endpoint = chr->cr_real_endpoint;
560 chr->cr_ring_set = RING_SET_DEFAULT;
561 chr->cr_pipe_id = 0;
562 (void) snprintf(chr->cr_name, sizeof(chr->cr_name), "upipe:%llu:%.*s",
563 nx->nx_id, (int)nx->nx_prov->nxprov_params->nxp_namelen,
564 nx->nx_prov->nxprov_params->nxp_name);
565
566 err = na_connect(nx, ch, chr, ch0, nxb, p);
567 done:
568 return err;
569 }
570
571 static void
nx_upipe_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)572 nx_upipe_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
573 struct kern_nexus *nx, struct kern_channel *ch)
574 {
575 #pragma unused(nxdom_prov)
576 SK_LOCK_ASSERT_HELD();
577
578 SK_D("channel 0x%llx -!- nexus 0x%llx (%s:\"%s\":%u:%d)", SK_KVA(ch),
579 SK_KVA(nx), nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
580 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
581
582 na_disconnect(nx, ch);
583 /*
584 * Set NXF_REJECT on the nexus which would cause any channel on the
585 * peer adapter to cease to function.
586 */
587 if (NX_PROV(nx)->nxprov_params->nxp_reject_on_close) {
588 atomic_bitset_32(&nx->nx_flags, NXF_REJECT);
589 }
590 }
591
592 static void
nx_upipe_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)593 nx_upipe_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
594 struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
595 {
596 #pragma unused(nxdom_prov, nx)
597 struct nexus_adapter *na = ch->ch_na;
598 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
599 ring_id_t qfirst = ch->ch_first[NR_TX];
600 ring_id_t qlast = ch->ch_last[NR_TX];
601 uint32_t i;
602
603 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
604 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
605 ASSERT(na->na_type == NA_USER_PIPE);
606
607 /*
608 * Inform the peer receiver thread in nx_upipe_na_rxsync() or the
609 * peer transmit thread in nx_upipe_na_txsync() about
610 * this endpoint going defunct. We utilize the TX ring's
611 * lock for serialization, since that is what's being used
612 * by the receiving endpoint.
613 */
614 for (i = qfirst; i < qlast; i++) {
615 /*
616 * For maintaining lock ordering between the two channels of
617 * user pipe.
618 */
619 if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
620 (void) kr_enter(&NAKR(na, NR_TX)[i], TRUE);
621 (void) kr_enter(NAKR(na, NR_RX)[i].ckr_pipe, TRUE);
622 } else {
623 (void) kr_enter(NAKR(na, NR_RX)[i].ckr_pipe, TRUE);
624 (void) kr_enter(&NAKR(na, NR_TX)[i], TRUE);
625 }
626 }
627
628 na_ch_rings_defunct(ch, p);
629
630 for (i = qfirst; i < qlast; i++) {
631 if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
632 (void) kr_exit(NAKR(na, NR_RX)[i].ckr_pipe);
633 (void) kr_exit(&NAKR(na, NR_TX)[i]);
634 } else {
635 (void) kr_exit(&NAKR(na, NR_TX)[i]);
636 (void) kr_exit(NAKR(na, NR_RX)[i].ckr_pipe);
637 }
638 }
639 }
640
641 static void
nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)642 nx_upipe_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
643 struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
644 {
645 #pragma unused(nxdom_prov)
646 struct nexus_upipe_adapter *pna =
647 (struct nexus_upipe_adapter *)ch->ch_na;
648
649 if (!locked) {
650 SK_LOCK_ASSERT_NOTHELD();
651 SK_LOCK();
652 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
653 } else {
654 SK_LOCK_ASSERT_HELD();
655 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
656 }
657
658 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
659 ASSERT(ch->ch_na->na_type == NA_USER_PIPE);
660
661 /*
662 * At this point, we know that the arena shared by the master and
663 * slave adapters has no more valid mappings on the channels opened
664 * to them. We need to invoke na_defunct() on both adapters to
665 * release any remaining slots attached to their rings.
666 *
667 * Note that the 'ch' that we pass in here is irrelevant as we
668 * don't support user packet pool for user pipe.
669 */
670 na_defunct(nx, ch, &pna->pna_up, locked);
671 if (pna->pna_peer != NULL) {
672 na_defunct(nx, ch, &pna->pna_peer->pna_up, locked);
673 }
674
675 /*
676 * And if their parent adapter (the memory owner) is a pseudo
677 * nexus adapter that we initially created in nx_upipe_na_find(),
678 * invoke na_defunct() on it now to do the final teardown on
679 * the arena.
680 */
681 if (pna->pna_parent->na_type == NA_PSEUDO) {
682 na_defunct(nx, ch, pna->pna_parent, locked);
683 }
684
685 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d)",
686 ch->ch_name, ch->ch_pid, SK_KVA(ch), SK_KVA(nx),
687 nxdom_prov->nxdom_prov_name, ch->ch_na->na_name,
688 ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id);
689
690 if (!locked) {
691 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_NOTOWNED);
692 SK_UNLOCK();
693 } else {
694 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
695 SK_LOCK_ASSERT_HELD();
696 }
697 }
698
699 /* allocate the pipe array in the parent adapter */
700 static int
nx_upipe_na_alloc(struct nexus_adapter * na,uint32_t npipes)701 nx_upipe_na_alloc(struct nexus_adapter *na, uint32_t npipes)
702 {
703 struct nexus_upipe_adapter **npa;
704
705 if (npipes <= na->na_max_pipes) {
706 /* we already have more entries that requested */
707 return 0;
708 }
709 if (npipes < na->na_next_pipe || npipes > NX_UPIPE_MAXPIPES) {
710 return EINVAL;
711 }
712
713 npa = sk_realloc_type_array(struct nexus_upipe_adapter *,
714 na->na_max_pipes, npipes, na->na_pipes, Z_WAITOK, skmem_tag_pipes);
715 if (npa == NULL) {
716 return ENOMEM;
717 }
718
719 na->na_pipes = npa;
720 na->na_max_pipes = npipes;
721
722 return 0;
723 }
724
725 /* deallocate the parent array in the parent adapter */
726 void
nx_upipe_na_dealloc(struct nexus_adapter * na)727 nx_upipe_na_dealloc(struct nexus_adapter *na)
728 {
729 if (na->na_pipes) {
730 if (na->na_next_pipe > 0) {
731 SK_ERR("freeing not empty pipe array for %s "
732 "(%u dangling pipes)!", na->na_name,
733 na->na_next_pipe);
734 }
735 sk_free_type_array(struct nexus_upipe_adapter *,
736 na->na_max_pipes, na->na_pipes);
737 na->na_pipes = NULL;
738 na->na_max_pipes = 0;
739 na->na_next_pipe = 0;
740 }
741 }
742
743 /* find a pipe endpoint with the given id among the parent's pipes */
744 static struct nexus_upipe_adapter *
nx_upipe_find(struct nexus_adapter * parent,uint32_t pipe_id)745 nx_upipe_find(struct nexus_adapter *parent, uint32_t pipe_id)
746 {
747 uint32_t i;
748 struct nexus_upipe_adapter *na;
749
750 for (i = 0; i < parent->na_next_pipe; i++) {
751 na = parent->na_pipes[i];
752 if (na->pna_id == pipe_id) {
753 return na;
754 }
755 }
756 return NULL;
757 }
758
759 /* add a new pipe endpoint to the parent array */
760 static int
nx_upipe_na_add(struct nexus_adapter * parent,struct nexus_upipe_adapter * na)761 nx_upipe_na_add(struct nexus_adapter *parent, struct nexus_upipe_adapter *na)
762 {
763 if (parent->na_next_pipe >= parent->na_max_pipes) {
764 uint32_t npipes = parent->na_max_pipes ?
765 2 * parent->na_max_pipes : 2;
766 int error = nx_upipe_na_alloc(parent, npipes);
767 if (error) {
768 return error;
769 }
770 }
771
772 parent->na_pipes[parent->na_next_pipe] = na;
773 na->pna_parent_slot = parent->na_next_pipe;
774 parent->na_next_pipe++;
775 return 0;
776 }
777
778 /* remove the given pipe endpoint from the parent array */
779 static void
nx_upipe_na_remove(struct nexus_adapter * parent,struct nexus_upipe_adapter * na)780 nx_upipe_na_remove(struct nexus_adapter *parent, struct nexus_upipe_adapter *na)
781 {
782 uint32_t n;
783 n = --parent->na_next_pipe;
784 if (n != na->pna_parent_slot) {
785 struct nexus_upipe_adapter **p =
786 &parent->na_pipes[na->pna_parent_slot];
787 *p = parent->na_pipes[n];
788 (*p)->pna_parent_slot = na->pna_parent_slot;
789 }
790 parent->na_pipes[n] = NULL;
791 }
792
793 static int
nx_upipe_na_txsync(struct __kern_channel_ring * txkring,struct proc * p,uint32_t flags)794 nx_upipe_na_txsync(struct __kern_channel_ring *txkring, struct proc *p,
795 uint32_t flags)
796 {
797 struct __kern_channel_ring *rxkring = txkring->ckr_pipe;
798 volatile uint64_t *tx_tsync, *tx_tnote, *rx_tsync;
799 int sent = 0, ret = 0;
800
801 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
802 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u "
803 "flags 0x%x -> kr \"%s\" (0x%llx) krflags 0x%b ring %u",
804 sk_proc_name_address(p), sk_proc_pid(p), txkring->ckr_name,
805 SK_KVA(txkring), txkring->ckr_flags, CKRF_BITS,
806 txkring->ckr_ring_id, flags, rxkring->ckr_name, SK_KVA(rxkring),
807 rxkring->ckr_flags, CKRF_BITS, rxkring->ckr_ring_id);
808
809 /*
810 * Serialize write access to the transmit ring, since another
811 * thread coming down for rxsync might pick up pending slots.
812 */
813 ASSERT(txkring->ckr_owner == current_thread());
814
815 /*
816 * Record the time of sync and grab sync time of other side;
817 * use atomic store and load since we're not holding the
818 * lock used by the receive ring. This allows us to avoid
819 * the potentially costly membar_sync().
820 */
821 /* deconst */
822 tx_tsync = __DECONST(uint64_t *, &txkring->ckr_ring->ring_sync_time);
823 atomic_set_64(tx_tsync, txkring->ckr_sync_time);
824
825 /*
826 * Read from the peer's kring, not its user ring; the peer's channel
827 * may be defunct, in which case it's unsafe to access its user ring.
828 */
829 rx_tsync = __DECONST(uint64_t *, &rxkring->ckr_sync_time);
830 tx_tnote = __DECONST(uint64_t *, &txkring->ckr_ring->ring_notify_time);
831 *tx_tnote = atomic_add_64_ov(rx_tsync, 0);
832
833 if (__probable(txkring->ckr_rhead != txkring->ckr_khead)) {
834 sent = nx_upipe_na_txsync_locked(txkring, p, flags,
835 &ret, FALSE);
836 }
837
838 if (sent != 0) {
839 (void) rxkring->ckr_na_notify(rxkring, p, 0);
840 }
841
842 return ret;
843 }
844
845 int
nx_upipe_na_txsync_locked(struct __kern_channel_ring * txkring,struct proc * p,uint32_t flags,int * ret,boolean_t rx)846 nx_upipe_na_txsync_locked(struct __kern_channel_ring *txkring, struct proc *p,
847 uint32_t flags, int *ret, boolean_t rx)
848 {
849 #pragma unused(p, flags, rx)
850 struct __kern_channel_ring *rxkring = txkring->ckr_pipe;
851 const slot_idx_t lim_tx = txkring->ckr_lim;
852 const slot_idx_t lim_rx = rxkring->ckr_lim;
853 slot_idx_t j, k;
854 int n, m, b, sent = 0;
855 uint32_t byte_count = 0;
856 int limit; /* max # of slots to transfer */
857
858 *ret = 0;
859
860 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
861 "%s(%d) kr \"%s\", kh %3u kt %3u | "
862 "rh %3u rt %3u [pre%s]", sk_proc_name_address(p),
863 sk_proc_pid(p), txkring->ckr_name, txkring->ckr_khead,
864 txkring->ckr_ktail, txkring->ckr_rhead,
865 txkring->ckr_rtail, rx ? "*" : "");
866 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
867 "%s(%d) kr \"%s\", kh %3u kt %3u | "
868 "rh %3u rt %3u [pre%s]", sk_proc_name_address(p),
869 sk_proc_pid(p), rxkring->ckr_name, rxkring->ckr_khead,
870 rxkring->ckr_ktail, rxkring->ckr_rhead,
871 rxkring->ckr_rtail, rx ? "*" : "");
872
873 if (__improbable(KR_DROP(txkring) || KR_DROP(rxkring))) {
874 *ret = ENXIO;
875 goto done;
876 }
877
878 j = rxkring->ckr_ktail; /* RX */
879 k = txkring->ckr_khead; /* TX */
880
881 /* # of new tx slots */
882 n = txkring->ckr_rhead - txkring->ckr_khead;
883 if (n < 0) {
884 n += txkring->ckr_num_slots;
885 }
886 limit = n;
887
888 /* # of rx busy (unclaimed) slots */
889 b = j - rxkring->ckr_khead;
890 if (b < 0) {
891 b += rxkring->ckr_num_slots;
892 }
893
894 /* # of rx avail free slots (subtract busy from max) */
895 m = lim_rx - b;
896 if (m < limit) {
897 limit = m;
898 }
899
900 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
901 "%s(%d) kr \"%s\" -> new %u, kr \"%s\" "
902 "-> free %u", sk_proc_name_address(p), sk_proc_pid(p),
903 txkring->ckr_name, n, rxkring->ckr_name, m);
904
905 /* rxring is full, or nothing to send? */
906 if (__improbable((sent = limit) == 0)) {
907 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
908 "%s(%d) kr \"%s\" -> %s%s",
909 sk_proc_name_address(p), sk_proc_pid(p), (n > m) ?
910 rxkring->ckr_name : txkring->ckr_name, ((n > m) ?
911 "no room avail" : "no new slots"),
912 (rx ? " (lost race, ok)" : ""));
913 goto done;
914 }
915
916 ASSERT(limit > 0);
917 while (limit--) {
918 struct __kern_slot_desc *ksd_tx = KR_KSD(txkring, k);
919 struct __user_slot_desc *usd_tx = KR_USD(txkring, k);
920 struct __kern_slot_desc *ksd_rx = KR_KSD(rxkring, j);
921 struct __user_slot_desc *usd_rx = KR_USD(rxkring, j);
922 struct __kern_quantum *kqum;
923
924 kqum = ksd_tx->sd_qum;
925 /*
926 * Packets failing internalization should be dropped in
927 * TX sync prologue.
928 */
929 ASSERT((kqum->qum_qflags & (QUM_F_INTERNALIZED |
930 QUM_F_FINALIZED)) == (QUM_F_INTERNALIZED |
931 QUM_F_FINALIZED));
932
933 byte_count += kqum->qum_len;
934
935 /*
936 * Swap the slots.
937 *
938 * XXX: [email protected] -- this bypasses the slot attach/detach
939 * interface, and needs to be changed when upipe adopts the
940 * packet APIs. SD_SWAP() will perform a block copy of the
941 * swap, and will readjust the kernel slot descriptor's sd_user
942 * accordingly.
943 */
944 SD_SWAP(ksd_rx, usd_rx, ksd_tx, usd_tx);
945
946 j = SLOT_NEXT(j, lim_rx);
947 k = SLOT_NEXT(k, lim_tx);
948 }
949
950 kr_update_stats(rxkring, sent, byte_count);
951 if (__improbable(kr_stat_enable != 0)) {
952 txkring->ckr_stats = rxkring->ckr_stats;
953 }
954
955 /*
956 * Make sure the slots are updated before ckr_ktail reach global
957 * visibility, since we are not holding rx ring's kr_enter().
958 */
959 membar_sync();
960
961 rxkring->ckr_ktail = j;
962 txkring->ckr_khead = k;
963 txkring->ckr_ktail = SLOT_PREV(k, lim_tx);
964
965 done:
966 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
967 "%s(%d) kr \"%s\", kh %3u kt %3u | "
968 "rh %3u rt %3u [post%s]", sk_proc_name_address(p),
969 sk_proc_pid(p), txkring->ckr_name, txkring->ckr_khead,
970 txkring->ckr_ktail, txkring->ckr_rhead,
971 txkring->ckr_rtail, rx ? "*" : "");
972 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_TX,
973 "%s(%d) kr \"%s\", kh %3u kt %3u | "
974 "rh %3u rt %3u [post%s]", sk_proc_name_address(p),
975 sk_proc_pid(p), rxkring->ckr_name, rxkring->ckr_khead,
976 rxkring->ckr_ktail, rxkring->ckr_rhead,
977 rxkring->ckr_rtail, rx ? "*" : "");
978
979 return sent;
980 }
981
982 static int
nx_upipe_na_rxsync(struct __kern_channel_ring * rxkring,struct proc * p,uint32_t flags)983 nx_upipe_na_rxsync(struct __kern_channel_ring *rxkring, struct proc *p,
984 uint32_t flags)
985 {
986 #pragma unused(p)
987 struct __kern_channel_ring *txkring = rxkring->ckr_pipe;
988 volatile uint64_t *rx_tsync, *rx_tnote, *tx_tsync;
989 const slot_idx_t lim_rx = rxkring->ckr_lim;
990 int n; /* new slots from transmit side */
991 int m, b, ret = 0;
992 uint32_t r;
993
994 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
995 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u "
996 "flags 0x%x <- kr \"%s\" (0x%llx) krflags 0x%b ring %u",
997 sk_proc_name_address(p), sk_proc_pid(p), rxkring->ckr_name,
998 SK_KVA(rxkring), rxkring->ckr_flags, CKRF_BITS,
999 rxkring->ckr_ring_id, flags, txkring->ckr_name, SK_KVA(txkring),
1000 txkring->ckr_flags, CKRF_BITS, txkring->ckr_ring_id);
1001
1002 ASSERT(rxkring->ckr_owner == current_thread());
1003
1004 /* reclaim and get # of rx reclaimed slots */
1005 r = kr_reclaim(rxkring);
1006
1007 /* # of rx busy (unclaimed) slots */
1008 b = rxkring->ckr_ktail - rxkring->ckr_khead;
1009 if (b < 0) {
1010 b += rxkring->ckr_num_slots;
1011 }
1012
1013 /* # of rx avail free slots (subtract busy from max) */
1014 m = lim_rx - b;
1015
1016 /*
1017 * Check if there's any new slots on transmit ring; do this
1018 * first without acquiring that ring's ckr_qlock, and use
1019 * the memory barrier (paired with second one in txsync.)
1020 * If we missed the race we'd just pay the cost of acquiring
1021 * ckr_qlock and potentially returning from "internal txsync"
1022 * without anything to process, which is okay.
1023 */
1024 membar_sync();
1025 n = txkring->ckr_rhead - txkring->ckr_khead;
1026 if (n < 0) {
1027 n += txkring->ckr_num_slots;
1028 }
1029
1030 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1031 "%s(%d) kr \"%s\" <- free %u, kr \"%s\" <- new %u",
1032 sk_proc_name_address(p), sk_proc_pid(p),
1033 rxkring->ckr_name, m, txkring->ckr_name, n);
1034
1035 /*
1036 * Record the time of sync and grab sync time of other side;
1037 * use atomic store and load since we're not holding the
1038 * lock used by the receive ring. This allows us to avoid
1039 * the potentially costly membar_sync().
1040 */
1041 /* deconst */
1042 rx_tsync = __DECONST(uint64_t *, &rxkring->ckr_ring->ring_sync_time);
1043 atomic_set_64(rx_tsync, rxkring->ckr_sync_time);
1044
1045 /*
1046 * Read from the peer's kring, not its user ring; the peer's channel
1047 * may be defunct, in which case it's unsafe to access its user ring.
1048 */
1049 tx_tsync = __DECONST(uint64_t *, &txkring->ckr_sync_time);
1050 rx_tnote = __DECONST(uint64_t *, &rxkring->ckr_ring->ring_notify_time);
1051 *rx_tnote = atomic_add_64_ov(tx_tsync, 0);
1052
1053 /*
1054 * If we have slots to pick up from the transmit side and and we
1055 * have space available, perform an equivalent of "internal txsync".
1056 *
1057 * Acquire write access to the transmit (peer) ring,
1058 * Serialize write access to it, since another thread
1059 * coming down for txsync might add new slots.
1060 * If we fail to get the kring lock, then don't worry because
1061 * there's already a transmit sync in progress to move packets.
1062 */
1063 if (__probable(n != 0 && m != 0 && (flags & NA_SYNCF_MONITOR) == 0)) {
1064 (void) kr_enter(txkring, TRUE);
1065 n = nx_upipe_na_txsync_locked(txkring, p, flags, &ret, TRUE);
1066 kr_exit(txkring);
1067 } else {
1068 n = 0;
1069 }
1070
1071 /*
1072 * If we have reclaimed some slots or transferred new slots
1073 * from the transmit side, notify the other end. Also notify
1074 * ourselves to pick up newly transferred ones, if any.
1075 */
1076 if (__probable(r != 0 || n != 0)) {
1077 SK_DF(SK_VERB_USER_PIPE | SK_VERB_SYNC | SK_VERB_RX,
1078 "%s(%d) kr \"%s\", kh %3u kt %3u | "
1079 "rh %3u rt %3u [rel %u new %u]",
1080 sk_proc_name_address(p), sk_proc_pid(p), rxkring->ckr_name,
1081 rxkring->ckr_khead, rxkring->ckr_ktail,
1082 rxkring->ckr_rhead, rxkring->ckr_rtail, r, n);
1083
1084 (void) txkring->ckr_na_notify(txkring, p, 0);
1085 }
1086
1087 return ret;
1088 }
1089
1090 static int
nx_upipe_na_rings_create(struct nexus_adapter * na,struct kern_channel * ch)1091 nx_upipe_na_rings_create(struct nexus_adapter *na, struct kern_channel *ch)
1092 {
1093 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1094 struct nexus_adapter *ona = &pna->pna_peer->pna_up;
1095 int error = 0;
1096 enum txrx t;
1097 uint32_t i;
1098
1099 /*
1100 * Create krings and all the rings for this end;
1101 * we'll update ckr_save_ring pointers below.
1102 */
1103 error = na_rings_mem_setup(na, FALSE, ch);
1104 if (error != 0) {
1105 goto err;
1106 }
1107
1108 /* update our hidden ring pointers */
1109 for_rx_tx(t) {
1110 for (i = 0; i < na_get_nrings(na, t); i++) {
1111 NAKR(na, t)[i].ckr_save_ring =
1112 NAKR(na, t)[i].ckr_ring;
1113 }
1114 }
1115
1116 /* now, create krings and rings of the other end */
1117 error = na_rings_mem_setup(ona, FALSE, ch);
1118 if (error != 0) {
1119 na_rings_mem_teardown(na, ch, FALSE); /* this end */
1120 goto err;
1121 }
1122
1123 for_rx_tx(t) {
1124 for (i = 0; i < na_get_nrings(ona, t); i++) {
1125 NAKR(ona, t)[i].ckr_save_ring =
1126 NAKR(ona, t)[i].ckr_ring;
1127 }
1128 }
1129
1130 /* cross link the krings */
1131 for_rx_tx(t) {
1132 /* swap NR_TX <-> NR_RX (skip host ring) */
1133 enum txrx r = sk_txrx_swap(t);
1134 for (i = 0; i < na_get_nrings(na, t); i++) {
1135 NAKR(na, t)[i].ckr_pipe =
1136 NAKR(&pna->pna_peer->pna_up, r) + i;
1137 NAKR(&pna->pna_peer->pna_up, r)[i].ckr_pipe =
1138 NAKR(na, t) + i;
1139 }
1140 }
1141 err:
1142 return error;
1143 }
1144
1145 /*
1146 * Pipe endpoints are created and destroyed together, so that endopoints do not
1147 * have to check for the existence of their peer at each ?xsync.
1148 *
1149 * To play well with the existing nexus adapter infrastructure (refcounts etc.),
1150 * we adopt the following strategy:
1151 *
1152 * 1) The first endpoint that is created also creates the other endpoint and
1153 * grabs a reference to it.
1154 *
1155 * state A) user1 --> endpoint1 --> endpoint2
1156 *
1157 * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
1158 * its reference to the user:
1159 *
1160 * state B) user1 --> endpoint1 endpoint2 <--- user2
1161 *
1162 * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
1163 * callback endpoint2 notes that endpoint1 is still active and adds a reference
1164 * from endpoint1 to itself. When user2 then releases her own reference,
1165 * endpoint2 is not destroyed and we are back to state A. A symmetrical state
1166 * would be reached if endpoint1 were released instead.
1167 *
1168 * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
1169 * it owns a reference to endpoint2 and releases it.
1170 *
1171 * Something similar goes on for the creation and destruction of the krings.
1172 */
1173
1174
1175 /*
1176 * nx_upipe_na_krings_create.
1177 *
1178 * There are two cases:
1179 *
1180 * 1) state is
1181 *
1182 * usr1 --> e1 --> e2
1183 *
1184 * and we are e1. We have to create both sets
1185 * of krings.
1186 *
1187 * 2) state is
1188 *
1189 * usr1 --> e1 --> e2
1190 *
1191 * and we are e2. e1 is certainly registered and our
1192 * krings already exist, but they may be hidden.
1193 */
1194 static int
nx_upipe_na_krings_create(struct nexus_adapter * na,struct kern_channel * ch)1195 nx_upipe_na_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
1196 {
1197 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1198 int error = 0;
1199 enum txrx t;
1200 uint32_t i;
1201
1202 /*
1203 * Verify symmetrical ring counts; validated
1204 * at nexus provider registration time.
1205 */
1206 ASSERT(na_get_nrings(na, NR_TX) == na_get_nrings(na, NR_RX));
1207
1208 if (pna->pna_peer_ref) {
1209 /* case 1) above */
1210 SK_DF(SK_VERB_USER_PIPE,
1211 "0x%llx: case 1, create everything", SK_KVA(na));
1212 error = nx_upipe_na_rings_create(na, ch);
1213 } else {
1214 /* case 2) above */
1215 /* recover the hidden rings */
1216 SK_DF(SK_VERB_USER_PIPE,
1217 "0x%llx: case 2, hidden rings", SK_KVA(na));
1218 for_rx_tx(t) {
1219 for (i = 0; i < na_get_nrings(na, t); i++) {
1220 NAKR(na, t)[i].ckr_ring =
1221 NAKR(na, t)[i].ckr_save_ring;
1222 }
1223 }
1224 }
1225
1226 ASSERT(error == 0 || (na->na_tx_rings == NULL &&
1227 na->na_rx_rings == NULL && na->na_slot_ctxs == NULL));
1228 ASSERT(error == 0 || (pna->pna_peer->pna_up.na_tx_rings == NULL &&
1229 pna->pna_peer->pna_up.na_rx_rings == NULL &&
1230 pna->pna_peer->pna_up.na_slot_ctxs == NULL));
1231
1232 return error;
1233 }
1234
1235 /*
1236 * nx_upipe_na_activate.
1237 *
1238 * There are two cases on registration (onoff==1)
1239 *
1240 * 1.a) state is
1241 *
1242 * usr1 --> e1 --> e2
1243 *
1244 * and we are e1. Nothing special to do.
1245 *
1246 * 1.b) state is
1247 *
1248 * usr1 --> e1 --> e2 <-- usr2
1249 *
1250 * and we are e2. Drop the ref e1 is holding.
1251 *
1252 * There are two additional cases on unregister (onoff==0)
1253 *
1254 * 2.a) state is
1255 *
1256 * usr1 --> e1 --> e2
1257 *
1258 * and we are e1. Nothing special to do, e2 will
1259 * be cleaned up by the destructor of e1.
1260 *
1261 * 2.b) state is
1262 *
1263 * usr1 --> e1 e2 <-- usr2
1264 *
1265 * and we are either e1 or e2. Add a ref from the
1266 * other end and hide our rings.
1267 */
1268 static int
nx_upipe_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1269 nx_upipe_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1270 {
1271 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1272
1273 SK_LOCK_ASSERT_HELD();
1274
1275 SK_DF(SK_VERB_USER_PIPE, "na \"%s\" (0x%llx) %s", na->na_name,
1276 SK_KVA(na), na_activate_mode2str(mode));
1277
1278 switch (mode) {
1279 case NA_ACTIVATE_MODE_ON:
1280 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
1281 break;
1282
1283 case NA_ACTIVATE_MODE_DEFUNCT:
1284 break;
1285
1286 case NA_ACTIVATE_MODE_OFF:
1287 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
1288 break;
1289
1290 default:
1291 VERIFY(0);
1292 /* NOTREACHED */
1293 __builtin_unreachable();
1294 }
1295
1296 if (pna->pna_peer_ref) {
1297 SK_DF(SK_VERB_USER_PIPE,
1298 "0x%llx: case 1.a or 2.a, nothing to do", SK_KVA(na));
1299 return 0;
1300 }
1301
1302 switch (mode) {
1303 case NA_ACTIVATE_MODE_ON:
1304 SK_DF(SK_VERB_USER_PIPE,
1305 "0x%llx: case 1.b, drop peer", SK_KVA(na));
1306 if (pna->pna_peer->pna_peer_ref) {
1307 pna->pna_peer->pna_peer_ref = FALSE;
1308 (void) na_release_locked(na);
1309 }
1310 break;
1311
1312 case NA_ACTIVATE_MODE_OFF:
1313 SK_DF(SK_VERB_USER_PIPE,
1314 "0x%llx: case 2.b, grab peer", SK_KVA(na));
1315 if (!pna->pna_peer->pna_peer_ref) {
1316 na_retain_locked(na);
1317 pna->pna_peer->pna_peer_ref = TRUE;
1318 }
1319 break;
1320
1321 default:
1322 break;
1323 }
1324
1325 return 0;
1326 }
1327
1328 /*
1329 * nx_upipe_na_krings_delete.
1330 *
1331 * There are two cases:
1332 *
1333 * 1) state is
1334 *
1335 * usr1 --> e1 --> e2
1336 *
1337 * and we are e1 (e2 is not bound, so krings_delete cannot be
1338 * called on it);
1339 *
1340 * 2) state is
1341 *
1342 * usr1 --> e1 e2 <-- usr2
1343 *
1344 * and we are either e1 or e2.
1345 *
1346 * In the former case we have to also delete the krings of e2;
1347 * in the latter case we do nothing (note that our krings
1348 * have already been hidden in the unregister callback).
1349 */
1350 static void
nx_upipe_na_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)1351 nx_upipe_na_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
1352 boolean_t defunct)
1353 {
1354 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1355 struct nexus_adapter *ona; /* na of the other end */
1356 uint32_t i;
1357 enum txrx t;
1358
1359 SK_LOCK_ASSERT_HELD();
1360
1361 if (!pna->pna_peer_ref) {
1362 SK_DF(SK_VERB_USER_PIPE,
1363 "0x%llx: case 2, kept alive by peer", SK_KVA(na));
1364 /*
1365 * If adapter is defunct (note the explicit test against
1366 * NAF_DEFUNCT, and not the "defunct" parameter passed in
1367 * by the caller), then the peer's channel has gone defunct.
1368 * We get here because this channel was not defuncted, and
1369 * that this is the last active reference to the adapter.
1370 * At this point we tear everything down, since the caller
1371 * will proceed to destroying the memory regions.
1372 */
1373 if (na->na_flags & NAF_DEFUNCT) {
1374 na_rings_mem_teardown(na, ch, defunct);
1375 }
1376 return;
1377 }
1378
1379 /* case 1) above */
1380 SK_DF(SK_VERB_USER_PIPE,
1381 "0x%llx: case 1, deleting everyhing", SK_KVA(na));
1382
1383 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
1384
1385 /* restore the ring to be deleted on the peer */
1386 ona = &pna->pna_peer->pna_up;
1387 if (ona->na_tx_rings == NULL) {
1388 /*
1389 * Already deleted, we must be on an
1390 * cleanup-after-error path
1391 * Just delete this end
1392 */
1393 na_rings_mem_teardown(na, ch, defunct);
1394 return;
1395 }
1396
1397 /* delete the memory rings */
1398 na_rings_mem_teardown(na, ch, defunct);
1399
1400 if (!defunct) {
1401 for_rx_tx(t) {
1402 for (i = 0; i < na_get_nrings(ona, t); i++) {
1403 NAKR(ona, t)[i].ckr_ring =
1404 NAKR(ona, t)[i].ckr_save_ring;
1405 }
1406 }
1407 }
1408
1409 /* Delete the memory rings */
1410 na_rings_mem_teardown(ona, ch, defunct);
1411 }
1412
1413 static void
nx_upipe_na_dtor(struct nexus_adapter * na)1414 nx_upipe_na_dtor(struct nexus_adapter *na)
1415 {
1416 struct nexus_upipe_adapter *pna = (struct nexus_upipe_adapter *)na;
1417 struct nx_upipe *u = NX_UPIPE_PRIVATE(na->na_nx);
1418
1419 SK_LOCK_ASSERT_HELD();
1420
1421 SK_DF(SK_VERB_USER_PIPE, "0x%llx", SK_KVA(na));
1422 if (pna->pna_peer_ref) {
1423 SK_DF(SK_VERB_USER_PIPE,
1424 "0x%llx: clean up peer 0x%llx", SK_KVA(na),
1425 SK_KVA(&pna->pna_peer->pna_up));
1426 pna->pna_peer_ref = FALSE;
1427 (void) na_release_locked(&pna->pna_peer->pna_up);
1428 }
1429 if (pna->pna_role == CH_ENDPOINT_USER_PIPE_MASTER) {
1430 nx_upipe_na_remove(pna->pna_parent, pna);
1431 }
1432 (void) na_release_locked(pna->pna_parent);
1433 pna->pna_parent = NULL;
1434
1435 /* release reference to parent adapter held by nx_upipe_na_find() */
1436 ASSERT(u->nup_pna_users != 0);
1437 if (--u->nup_pna_users == 0) {
1438 ASSERT(u->nup_pna != NULL);
1439 SK_DF(SK_VERB_USER_PIPE, "release parent: \"%s\" (0x%llx)",
1440 u->nup_pna->na_name, SK_KVA(u->nup_pna));
1441 na_release_locked(u->nup_pna);
1442 u->nup_pna = NULL;
1443 }
1444 }
1445
1446 int
nx_upipe_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)1447 nx_upipe_na_find(struct kern_nexus *nx, struct kern_channel *ch,
1448 struct chreq *chr, struct nxbind *nxb, struct proc *p,
1449 struct nexus_adapter **na, boolean_t create)
1450 {
1451 #pragma unused(ch, p)
1452 struct nx_upipe *u = NX_UPIPE_PRIVATE(nx);
1453 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
1454 struct nexus_adapter *pna = NULL; /* parent adapter */
1455 boolean_t anon = NX_ANONYMOUS_PROV(nx);
1456 struct nexus_upipe_adapter *mna, *sna, *req;
1457 ch_endpoint_t ep = chr->cr_endpoint;
1458 uint32_t pipe_id;
1459 int error;
1460
1461 SK_LOCK_ASSERT_HELD();
1462 *na = NULL;
1463
1464 #if SK_LOG
1465 uuid_string_t uuidstr;
1466 SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
1467 "ring_id %d ring_set %u ep_type %u:%u create %u%s",
1468 chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
1469 (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
1470 chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
1471 chr->cr_real_endpoint, chr->cr_endpoint, create,
1472 (ep != CH_ENDPOINT_USER_PIPE_MASTER &&
1473 ep != CH_ENDPOINT_USER_PIPE_SLAVE) ? " (skipped)" : "");
1474 #endif /* SK_LOG */
1475
1476 if (ep != CH_ENDPOINT_USER_PIPE_MASTER &&
1477 ep != CH_ENDPOINT_USER_PIPE_SLAVE) {
1478 return 0;
1479 }
1480
1481 /*
1482 * Check client credentials.
1483 */
1484 if (chr->cr_port == NEXUS_PORT_USER_PIPE_SERVER) {
1485 if (!anon && (u->nup_srv_nxb == NULL || nxb == NULL ||
1486 !nxb_is_equal(u->nup_srv_nxb, nxb))) {
1487 return EACCES;
1488 }
1489 } else {
1490 ASSERT(chr->cr_port == NEXUS_PORT_USER_PIPE_CLIENT);
1491 if (!anon && (u->nup_cli_nxb == NULL || nxb == NULL ||
1492 !nxb_is_equal(u->nup_cli_nxb, nxb))) {
1493 return EACCES;
1494 }
1495 }
1496
1497 /*
1498 * First, try to find a previously-created parent adapter
1499 * for this nexus; else, create one and store it in the
1500 * nexus. We'll release this at nexus destructor time.
1501 */
1502 if ((pna = u->nup_pna) != NULL) {
1503 na_retain_locked(pna); /* for us */
1504 SK_DF(SK_VERB_USER_PIPE, "found parent: \"%s\" (0x%llx)",
1505 pna->na_name, SK_KVA(pna));
1506 } else {
1507 /* callee will hold a reference for us upon success */
1508 error = na_pseudo_create(nx, chr, &pna);
1509 if (error != 0) {
1510 SK_ERR("parent create failed: %d", error);
1511 return error;
1512 }
1513 /* hold an extra reference for nx_upipe */
1514 u->nup_pna = pna;
1515 na_retain_locked(pna);
1516 SK_DF(SK_VERB_USER_PIPE, "created parent: \"%s\" (0x%llx)",
1517 pna->na_name, SK_KVA(pna));
1518 }
1519
1520 /* next, lookup the pipe id in the parent list */
1521 req = NULL;
1522 pipe_id = chr->cr_pipe_id;
1523 mna = nx_upipe_find(pna, pipe_id);
1524 if (mna != NULL) {
1525 if (mna->pna_role == ep) {
1526 SK_DF(SK_VERB_USER_PIPE,
1527 "found pipe_id %u directly at slot %u",
1528 pipe_id, mna->pna_parent_slot);
1529 req = mna;
1530 } else {
1531 SK_DF(SK_VERB_USER_PIPE,
1532 "found pipe_id %u indirectly at slot %u",
1533 pipe_id, mna->pna_parent_slot);
1534 req = mna->pna_peer;
1535 }
1536 /*
1537 * The pipe we have found already holds a ref to the parent,
1538 * so we need to drop the one we got from above.
1539 */
1540 (void) na_release_locked(pna);
1541 goto found;
1542 }
1543 SK_DF(SK_VERB_USER_PIPE,
1544 "pipe_id %u not found, create %u", pipe_id, create);
1545 if (!create) {
1546 error = ENODEV;
1547 goto put_out;
1548 }
1549 /*
1550 * We create both master and slave.
1551 * The endpoint we were asked for holds a reference to
1552 * the other one.
1553 */
1554 mna = na_upipe_alloc(Z_WAITOK);
1555
1556 ASSERT(mna->pna_up.na_type == NA_USER_PIPE);
1557 ASSERT(mna->pna_up.na_free == na_upipe_free);
1558
1559 (void) snprintf(mna->pna_up.na_name, sizeof(mna->pna_up.na_name),
1560 "%s{%u", pna->na_name, pipe_id);
1561 uuid_generate_random(mna->pna_up.na_uuid);
1562
1563 mna->pna_id = pipe_id;
1564 mna->pna_role = CH_ENDPOINT_USER_PIPE_MASTER;
1565 mna->pna_parent = pna;
1566 mna->pna_up.na_txsync = nx_upipe_na_txsync;
1567 mna->pna_up.na_rxsync = nx_upipe_na_rxsync;
1568 mna->pna_up.na_activate = nx_upipe_na_activate;
1569 mna->pna_up.na_dtor = nx_upipe_na_dtor;
1570 mna->pna_up.na_krings_create = nx_upipe_na_krings_create;
1571 mna->pna_up.na_krings_delete = nx_upipe_na_krings_delete;
1572 mna->pna_up.na_arena = pna->na_arena;
1573 skmem_arena_retain((&mna->pna_up)->na_arena);
1574 atomic_bitset_32(&mna->pna_up.na_flags, NAF_MEM_LOANED);
1575 *(nexus_meta_type_t *)(uintptr_t)&mna->pna_up.na_md_type =
1576 pna->na_md_type;
1577 *(nexus_meta_subtype_t *)(uintptr_t)&mna->pna_up.na_md_subtype =
1578 pna->na_md_subtype;
1579
1580 *(nexus_stats_type_t *)(uintptr_t)&mna->pna_up.na_stats_type =
1581 NEXUS_STATS_TYPE_INVALID;
1582 *(uint32_t *)(uintptr_t)&mna->pna_up.na_flowadv_max =
1583 nxp->nxp_flowadv_max;
1584 ASSERT(mna->pna_up.na_flowadv_max == 0 ||
1585 skmem_arena_nexus(mna->pna_up.na_arena)->arn_flowadv_obj != NULL);
1586
1587 /*
1588 * Parent adapter parameters must match the nexus provider's by the
1589 * time we get here, since na_find() above shouldn't return
1590 * one otherwise.
1591 */
1592 na_set_nrings(&mna->pna_up, NR_TX, nxp->nxp_tx_rings);
1593 na_set_nrings(&mna->pna_up, NR_RX, nxp->nxp_rx_rings);
1594 na_set_nslots(&mna->pna_up, NR_TX, nxp->nxp_tx_slots);
1595 na_set_nslots(&mna->pna_up, NR_RX, nxp->nxp_rx_slots);
1596 ASSERT(na_get_nrings(&mna->pna_up, NR_TX) == na_get_nrings(pna, NR_TX));
1597 ASSERT(na_get_nrings(&mna->pna_up, NR_RX) == na_get_nrings(pna, NR_RX));
1598 ASSERT(na_get_nslots(&mna->pna_up, NR_TX) == na_get_nslots(pna, NR_TX));
1599 ASSERT(na_get_nslots(&mna->pna_up, NR_RX) == na_get_nslots(pna, NR_RX));
1600
1601 na_attach_common(&mna->pna_up, nx, &nx_upipe_prov_s);
1602
1603 /* register the master with the parent */
1604 error = nx_upipe_na_add(pna, mna);
1605 if (error != 0) {
1606 goto free_mna;
1607 }
1608
1609 /* create the slave */
1610 sna = na_upipe_alloc(Z_WAITOK);
1611
1612 /* most fields are the same, copy from master and then fix */
1613 bcopy(mna, sna, sizeof(*sna));
1614 skmem_arena_retain((&sna->pna_up)->na_arena);
1615 atomic_bitset_32(&sna->pna_up.na_flags, NAF_MEM_LOANED);
1616
1617 ASSERT(sna->pna_up.na_type == NA_USER_PIPE);
1618 ASSERT(sna->pna_up.na_free == na_upipe_free);
1619
1620 (void) snprintf(sna->pna_up.na_name, sizeof(sna->pna_up.na_name),
1621 "%s}%d", pna->na_name, pipe_id);
1622 uuid_generate_random(sna->pna_up.na_uuid);
1623
1624 sna->pna_role = CH_ENDPOINT_USER_PIPE_SLAVE;
1625 na_attach_common(&sna->pna_up, nx, &nx_upipe_prov_s);
1626
1627 /* join the two endpoints */
1628 mna->pna_peer = sna;
1629 sna->pna_peer = mna;
1630
1631 /*
1632 * We already have a reference to the parent, but we
1633 * need another one for the other endpoint we created
1634 */
1635 na_retain_locked(pna);
1636
1637 if ((chr->cr_mode & CHMODE_DEFUNCT_OK) != 0) {
1638 atomic_bitset_32(&pna->na_flags, NAF_DEFUNCT_OK);
1639 }
1640
1641 if (ep == CH_ENDPOINT_USER_PIPE_MASTER) {
1642 req = mna;
1643 mna->pna_peer_ref = TRUE;
1644 na_retain_locked(&sna->pna_up);
1645 } else {
1646 req = sna;
1647 sna->pna_peer_ref = TRUE;
1648 na_retain_locked(&mna->pna_up);
1649 }
1650
1651 /* parent adapter now has two users (mna and sna) */
1652 u->nup_pna_users += 2;
1653
1654 #if SK_LOG
1655 SK_DF(SK_VERB_USER_PIPE, "created master 0x%llx and slave 0x%llx",
1656 SK_KVA(mna), SK_KVA(sna));
1657 SK_DF(SK_VERB_USER_PIPE, "mna: \"%s\"", mna->pna_up.na_name);
1658 SK_DF(SK_VERB_USER_PIPE, " UUID: %s",
1659 sk_uuid_unparse(mna->pna_up.na_uuid, uuidstr));
1660 SK_DF(SK_VERB_USER_PIPE, " nx: 0x%llx (\"%s\":\"%s\")",
1661 SK_KVA(mna->pna_up.na_nx), NX_DOM(mna->pna_up.na_nx)->nxdom_name,
1662 NX_DOM_PROV(mna->pna_up.na_nx)->nxdom_prov_name);
1663 SK_DF(SK_VERB_USER_PIPE, " flags: 0x%b",
1664 mna->pna_up.na_flags, NAF_BITS);
1665 SK_DF(SK_VERB_USER_PIPE, " flowadv_max: %u",
1666 mna->pna_up.na_flowadv_max);
1667 SK_DF(SK_VERB_USER_PIPE, " rings: tx %u rx %u",
1668 na_get_nrings(&mna->pna_up, NR_TX),
1669 na_get_nrings(&mna->pna_up, NR_RX));
1670 SK_DF(SK_VERB_USER_PIPE, " slots: tx %u rx %u",
1671 na_get_nslots(&mna->pna_up, NR_TX),
1672 na_get_nslots(&mna->pna_up, NR_RX));
1673 SK_DF(SK_VERB_USER_PIPE, " next_pipe: %u", mna->pna_up.na_next_pipe);
1674 SK_DF(SK_VERB_USER_PIPE, " max_pipes: %u", mna->pna_up.na_max_pipes);
1675 SK_DF(SK_VERB_USER_PIPE, " parent: \"%s\"",
1676 mna->pna_parent->na_name);
1677 SK_DF(SK_VERB_USER_PIPE, " id: %u", mna->pna_id);
1678 SK_DF(SK_VERB_USER_PIPE, " role: %u", mna->pna_role);
1679 SK_DF(SK_VERB_USER_PIPE, " peer_ref: %u", mna->pna_peer_ref);
1680 SK_DF(SK_VERB_USER_PIPE, " parent_slot: %u", mna->pna_parent_slot);
1681 SK_DF(SK_VERB_USER_PIPE, "sna: \"%s\"", sna->pna_up.na_name);
1682 SK_DF(SK_VERB_USER_PIPE, " UUID: %s",
1683 sk_uuid_unparse(sna->pna_up.na_uuid, uuidstr));
1684 SK_DF(SK_VERB_USER_PIPE, " nx: 0x%llx (\"%s\":\"%s\")",
1685 SK_KVA(sna->pna_up.na_nx), NX_DOM(sna->pna_up.na_nx)->nxdom_name,
1686 NX_DOM_PROV(sna->pna_up.na_nx)->nxdom_prov_name);
1687 SK_DF(SK_VERB_USER_PIPE, " flags: 0x%b",
1688 sna->pna_up.na_flags, NAF_BITS);
1689 SK_DF(SK_VERB_USER_PIPE, " flowadv_max: %u",
1690 sna->pna_up.na_flowadv_max);
1691 SK_DF(SK_VERB_USER_PIPE, " rings: tx %u rx %u",
1692 na_get_nrings(&sna->pna_up, NR_TX),
1693 na_get_nrings(&sna->pna_up, NR_RX));
1694 SK_DF(SK_VERB_USER_PIPE, " slots: tx %u rx %u",
1695 na_get_nslots(&sna->pna_up, NR_TX),
1696 na_get_nslots(&sna->pna_up, NR_RX));
1697 SK_DF(SK_VERB_USER_PIPE, " next_pipe: %u", sna->pna_up.na_next_pipe);
1698 SK_DF(SK_VERB_USER_PIPE, " max_pipes: %u", sna->pna_up.na_max_pipes);
1699 SK_DF(SK_VERB_USER_PIPE, " parent: \"%s\"",
1700 sna->pna_parent->na_name);
1701 SK_DF(SK_VERB_USER_PIPE, " id: %u", sna->pna_id);
1702 SK_DF(SK_VERB_USER_PIPE, " role: %u", sna->pna_role);
1703 SK_DF(SK_VERB_USER_PIPE, " peer_ref: %u", sna->pna_peer_ref);
1704 SK_DF(SK_VERB_USER_PIPE, " parent_slot: %u", sna->pna_parent_slot);
1705 #endif /* SK_LOG */
1706
1707 found:
1708
1709 SK_DF(SK_VERB_USER_PIPE, "pipe_id %u role %s at 0x%llx", pipe_id,
1710 (req->pna_role == CH_ENDPOINT_USER_PIPE_MASTER ?
1711 "master" : "slave"), SK_KVA(req));
1712 if ((chr->cr_mode & CHMODE_DEFUNCT_OK) == 0) {
1713 atomic_bitclear_32(&pna->na_flags, NAF_DEFUNCT_OK);
1714 }
1715 *na = &req->pna_up;
1716 na_retain_locked(*na);
1717
1718 /*
1719 * Keep the reference to the parent; it will be released
1720 * by the adapter's destructor.
1721 */
1722 return 0;
1723
1724 free_mna:
1725 if (mna->pna_up.na_arena != NULL) {
1726 skmem_arena_release((&mna->pna_up)->na_arena);
1727 mna->pna_up.na_arena = NULL;
1728 }
1729 NA_FREE(&mna->pna_up);
1730 put_out:
1731 (void) na_release_locked(pna);
1732 return error;
1733 }
1734
1735 static struct nx_upipe *
nx_upipe_alloc(zalloc_flags_t how)1736 nx_upipe_alloc(zalloc_flags_t how)
1737 {
1738 struct nx_upipe *u;
1739
1740 SK_LOCK_ASSERT_HELD();
1741
1742 u = zalloc_flags(nx_upipe_zone, how | Z_ZERO);
1743 if (u) {
1744 SK_DF(SK_VERB_MEM, "upipe 0x%llx ALLOC", SK_KVA(u));
1745 }
1746 return u;
1747 }
1748
1749 static void
nx_upipe_free(struct nx_upipe * u)1750 nx_upipe_free(struct nx_upipe *u)
1751 {
1752 ASSERT(u->nup_pna == NULL);
1753 ASSERT(u->nup_pna_users == 0);
1754 ASSERT(u->nup_cli_nxb == NULL);
1755 ASSERT(u->nup_srv_nxb == NULL);
1756
1757 SK_DF(SK_VERB_MEM, "upipe 0x%llx FREE", SK_KVA(u));
1758 zfree(nx_upipe_zone, u);
1759 }
1760