xref: /xnu-8019.80.24/bsd/skywalk/nexus/monitor/nx_monitor.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  *   1. Redistributions of source code must retain the above copyright
36  *      notice, this list of conditions and the following disclaimer.
37  *   2. Redistributions in binary form must reproduce the above copyright
38  *      notice, this list of conditions and the following disclaimer in the
39  *      documentation and/or other materials provided with the distribution.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  */
53 
54 /*
55  * $FreeBSD$
56  *
57  * Monitors
58  *
59  * netmap monitors can be used to do monitoring of network traffic
60  * on another adapter, when the latter adapter is working in netmap mode.
61  *
62  * Monitors offer to userspace the same interface as any other netmap port,
63  * with as many pairs of netmap rings as the monitored adapter.
64  * However, only the rx rings are actually used. Each monitor rx ring receives
65  * the traffic transiting on both the tx and rx corresponding rings in the
66  * monitored adapter. During registration, the user can choose if she wants
67  * to intercept tx only, rx only, or both tx and rx traffic.
68  *
69  * If the monitor is not able to cope with the stream of frames, excess traffic
70  * will be dropped.
71  *
72  * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
73  *
74  * Monitors can be either zero-copy or copy-based.
75  *
76  * Copy monitors see the frames before they are consumed:
77  *
78  *  - For tx traffic, this is when the application sends them, before they are
79  *    passed down to the adapter.
80  *
81  *  - For rx traffic, this is when they are received by the adapter, before
82  *    they are sent up to the application, if any (note that, if no
83  *    application is reading from a monitored ring, the ring will eventually
84  *    fill up and traffic will stop).
85  *
86  * Zero-copy monitors only see the frames after they have been consumed:
87  *
88  *  - For tx traffic, this is after the slots containing the frames have been
89  *    marked as free. Note that this may happen at a considerably delay after
90  *    frame transmission, since freeing of slots is often done lazily.
91  *
92  *  - For rx traffic, this is after the consumer on the monitored adapter
93  *    has released them. In most cases, the consumer is a userspace
94  *    application which may have modified the frame contents.
95  *
96  * Several copy monitors may be active on any ring.  Zero-copy monitors,
97  * instead, need exclusive access to each of the monitored rings.  This may
98  * change in the future, if we implement zero-copy monitor chaining.
99  *
100  */
101 
102 #include <skywalk/os_skywalk_private.h>
103 #include <skywalk/nexus/monitor/nx_monitor.h>
104 
105 static int nx_mon_na_txsync(struct __kern_channel_ring *, struct proc *,
106     uint32_t);
107 static int nx_mon_na_rxsync(struct __kern_channel_ring *, struct proc *,
108     uint32_t);
109 static int nx_mon_na_krings_create(struct nexus_adapter *,
110     struct kern_channel *);
111 static void nx_mon_na_krings_delete(struct nexus_adapter *,
112     struct kern_channel *, boolean_t);
113 static uint32_t nx_mon_txrx2chmode(enum txrx);
114 static int nx_mon_kr_alloc(struct __kern_channel_ring *, uint32_t);
115 static void nx_mon_kr_dealloc(struct __kern_channel_ring *);
116 static int nx_mon_na_krings_locks(struct nexus_adapter *,
117     uint32_t[NR_TXRX], uint32_t[NR_TXRX]);
118 static void nx_mon_na_krings_unlock(struct nexus_adapter *,
119     const uint32_t[NR_TXRX], const uint32_t[NR_TXRX]);
120 static int nx_mon_enable(struct nexus_adapter *, int);
121 static void nx_mon_disable(struct nexus_adapter *);
122 static int nx_mon_add(struct __kern_channel_ring *,
123     struct __kern_channel_ring *, boolean_t);
124 static void nx_mon_del(struct __kern_channel_ring *,
125     struct __kern_channel_ring *, boolean_t);
126 static int nx_mon_na_activate_common(struct nexus_adapter *,
127     na_activate_mode_t, boolean_t);
128 static pkt_copy_from_pkt_t nx_mon_quantum_copy_64x;
129 
130 static int nx_mon_zcopy_parent_sync(struct __kern_channel_ring *,
131     struct proc *, uint32_t, enum txrx);
132 static int nx_mon_zcopy_na_activate(struct nexus_adapter *, na_activate_mode_t);
133 static void nx_mon_zcopy_na_dtor(struct nexus_adapter *);
134 
135 static void nx_mon_parent_sync(struct __kern_channel_ring *, struct proc *,
136     slot_idx_t, int);
137 static int nx_mon_na_activate(struct nexus_adapter *, na_activate_mode_t);
138 static void nx_mon_na_dtor(struct nexus_adapter *);
139 
140 /*
141  * monitors work by replacing the nm_sync() and possibly the
142  * nm_notify() callbacks in the monitored rings.
143  */
144 static int nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *,
145     struct proc *, uint32_t);
146 static int nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *,
147     struct proc *, uint32_t);
148 static int nx_mon_parent_txsync(struct __kern_channel_ring *,
149     struct proc *, uint32_t);
150 static int nx_mon_parent_rxsync(struct __kern_channel_ring *,
151     struct proc *, uint32_t);
152 static int nx_mon_parent_notify(struct __kern_channel_ring *,
153     struct proc *, uint32_t);
154 
155 static void nx_mon_dom_init(struct nxdom *);
156 static void nx_mon_dom_terminate(struct nxdom *);
157 static void nx_mon_dom_fini(struct nxdom *);
158 static int nx_mon_dom_bind_port(struct kern_nexus *, nexus_port_t *,
159     struct nxbind *, void *);
160 static int nx_mon_dom_unbind_port(struct kern_nexus *, nexus_port_t);
161 static int nx_mon_dom_connect(struct kern_nexus_domain_provider *,
162     struct kern_nexus *, struct kern_channel *, struct chreq *,
163     struct kern_channel *, struct nxbind *, struct proc *);
164 static void nx_mon_dom_disconnect(struct kern_nexus_domain_provider *,
165     struct kern_nexus *, struct kern_channel *);
166 static void nx_mon_dom_defunct(struct kern_nexus_domain_provider *,
167     struct kern_nexus *, struct kern_channel *, struct proc *);
168 static void nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *,
169     struct kern_nexus *, struct kern_channel *, boolean_t);
170 
171 static int nx_mon_prov_init(struct kern_nexus_domain_provider *);
172 static int nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *,
173     const struct nxprov_params *, struct nxprov_adjusted_params *);
174 static int nx_mon_prov_params(struct kern_nexus_domain_provider *,
175     const uint32_t, const struct nxprov_params *, struct nxprov_params *,
176     struct skmem_region_params[SKMEM_REGIONS]);
177 static int nx_mon_prov_mem_new(struct kern_nexus_domain_provider *,
178     struct kern_nexus *, struct nexus_adapter *);
179 static void nx_mon_prov_fini(struct kern_nexus_domain_provider *);
180 
181 static struct nexus_monitor_adapter *na_mon_alloc(zalloc_flags_t);
182 static void na_mon_free(struct nexus_adapter *);
183 
184 struct nxdom nx_monitor_dom_s = {
185 	.nxdom_prov_head =
186     STAILQ_HEAD_INITIALIZER(nx_monitor_dom_s.nxdom_prov_head),
187 	.nxdom_type =           NEXUS_TYPE_MONITOR,
188 	.nxdom_md_type =        NEXUS_META_TYPE_QUANTUM,
189 	.nxdom_md_subtype =     NEXUS_META_SUBTYPE_PAYLOAD,
190 	.nxdom_name =           "monitor",
191 	/*
192 	 * The following values don't really matter much, as a monitor
193 	 * isn't usable on its own; we just define them as non-zeroes.
194 	 */
195 	.nxdom_ports =          {
196 		.nb_def = 1,
197 		.nb_min = 1,
198 		.nb_max = 1,
199 	},
200 	.nxdom_tx_rings = {
201 		.nb_def = 1,
202 		.nb_min = 1,
203 		.nb_max = 1,
204 	},
205 	.nxdom_rx_rings = {
206 		.nb_def = 1,
207 		.nb_min = 1,
208 		.nb_max = 1,
209 	},
210 	.nxdom_tx_slots = {
211 		.nb_def = 1,
212 		.nb_min = 1,
213 		.nb_max = 1,
214 	},
215 	.nxdom_rx_slots = {
216 		.nb_def = 1,
217 		.nb_min = 1,
218 		.nb_max = 1,
219 	},
220 	.nxdom_buf_size = {
221 		.nb_def = 64,
222 		.nb_min = 64,
223 		.nb_max = 64,
224 	},
225 	.nxdom_meta_size = {
226 		.nb_def = NX_METADATA_OBJ_MIN_SZ,
227 		.nb_min = NX_METADATA_OBJ_MIN_SZ,
228 		.nb_max = NX_METADATA_USR_MAX_SZ,
229 	},
230 	.nxdom_stats_size = {
231 		.nb_def = 0,
232 		.nb_min = 0,
233 		.nb_max = NX_STATS_MAX_SZ,
234 	},
235 	.nxdom_pipes = {
236 		.nb_def = 0,
237 		.nb_min = 0,
238 		.nb_max = 0,
239 	},
240 	.nxdom_flowadv_max = {
241 		.nb_def = 0,
242 		.nb_min = 0,
243 		.nb_max = NX_FLOWADV_MAX,
244 	},
245 	.nxdom_nexusadv_size = {
246 		.nb_def = 0,
247 		.nb_min = 0,
248 		.nb_max = NX_NEXUSADV_MAX_SZ,
249 	},
250 	.nxdom_capabilities = {
251 		.nb_def = NXPCAP_USER_CHANNEL,
252 		.nb_min = NXPCAP_USER_CHANNEL,
253 		.nb_max = NXPCAP_USER_CHANNEL,
254 	},
255 	.nxdom_qmap = {
256 		.nb_def = NEXUS_QMAP_TYPE_INVALID,
257 		.nb_min = NEXUS_QMAP_TYPE_INVALID,
258 		.nb_max = NEXUS_QMAP_TYPE_INVALID,
259 	},
260 	.nxdom_max_frags = {
261 		.nb_def = NX_PBUF_FRAGS_DEFAULT,
262 		.nb_min = NX_PBUF_FRAGS_MIN,
263 		.nb_max = NX_PBUF_FRAGS_DEFAULT,
264 	},
265 	.nxdom_init =           nx_mon_dom_init,
266 	.nxdom_terminate =      nx_mon_dom_terminate,
267 	.nxdom_fini =           nx_mon_dom_fini,
268 	.nxdom_find_port =      NULL,
269 	.nxdom_port_is_reserved = NULL,
270 	.nxdom_bind_port =      nx_mon_dom_bind_port,
271 	.nxdom_unbind_port =    nx_mon_dom_unbind_port,
272 	.nxdom_connect =        nx_mon_dom_connect,
273 	.nxdom_disconnect =     nx_mon_dom_disconnect,
274 	.nxdom_defunct =        nx_mon_dom_defunct,
275 	.nxdom_defunct_finalize = nx_mon_dom_defunct_finalize,
276 };
277 
278 static struct kern_nexus_domain_provider nx_monitor_prov_s = {
279 	.nxdom_prov_name =              NEXUS_PROVIDER_MONITOR,
280 	.nxdom_prov_flags =             NXDOMPROVF_DEFAULT,
281 	.nxdom_prov_cb = {
282 		.dp_cb_init =           nx_mon_prov_init,
283 		.dp_cb_fini =           nx_mon_prov_fini,
284 		.dp_cb_params =         nx_mon_prov_params,
285 		.dp_cb_mem_new =        nx_mon_prov_mem_new,
286 		.dp_cb_config =         NULL,
287 		.dp_cb_nx_ctor =        NULL,
288 		.dp_cb_nx_dtor =        NULL,
289 		.dp_cb_nx_mem_info =    NULL,           /* not supported */
290 		.dp_cb_nx_mib_get =     NULL,
291 	},
292 };
293 
294 static ZONE_DECLARE(na_mon_zone, SKMEM_ZONE_PREFIX ".na.mon",
295     sizeof(struct nexus_monitor_adapter), ZC_ZFREE_CLEARMEM);
296 
297 #define SKMEM_TAG_MONITORS      "com.apple.skywalk.monitors"
298 static kern_allocation_name_t skmem_tag_monitors;
299 
300 static void
nx_mon_dom_init(struct nxdom * nxdom)301 nx_mon_dom_init(struct nxdom *nxdom)
302 {
303 	SK_LOCK_ASSERT_HELD();
304 	ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
305 
306 	ASSERT(skmem_tag_monitors == NULL);
307 	skmem_tag_monitors =
308 	    kern_allocation_name_allocate(SKMEM_TAG_MONITORS, 0);
309 	ASSERT(skmem_tag_monitors != NULL);
310 
311 	(void) nxdom_prov_add(nxdom, &nx_monitor_prov_s);
312 }
313 
314 static void
nx_mon_dom_terminate(struct nxdom * nxdom)315 nx_mon_dom_terminate(struct nxdom *nxdom)
316 {
317 	struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
318 
319 	STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
320 	    nxdom_prov_link, tnxdp) {
321 		(void) nxdom_prov_del(nxdom_prov);
322 	}
323 
324 	if (skmem_tag_monitors != NULL) {
325 		kern_allocation_name_release(skmem_tag_monitors);
326 		skmem_tag_monitors = NULL;
327 	}
328 }
329 
330 static void
nx_mon_dom_fini(struct nxdom * nxdom)331 nx_mon_dom_fini(struct nxdom *nxdom)
332 {
333 #pragma unused(nxdom)
334 }
335 
336 __attribute__((noreturn))
337 static int
nx_mon_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb,void * info)338 nx_mon_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
339     struct nxbind *nxb, void *info)
340 {
341 #pragma unused(nx, nx_port, nxb, info)
342 	VERIFY(0);
343 	/* NOTREACHED */
344 	__builtin_unreachable();
345 }
346 
347 __attribute__((noreturn))
348 static int
nx_mon_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)349 nx_mon_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
350 {
351 #pragma unused(nx, nx_port)
352 	VERIFY(0);
353 	/* NOTREACHED */
354 	__builtin_unreachable();
355 }
356 
357 __attribute__((noreturn))
358 static int
nx_mon_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)359 nx_mon_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
360     struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
361     struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
362 {
363 #pragma unused(nxdom_prov, nx, ch, chr, ch0, nxb, p)
364 	VERIFY(0);
365 	/* NOTREACHED */
366 	__builtin_unreachable();
367 }
368 
369 __attribute__((noreturn))
370 static void
nx_mon_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)371 nx_mon_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
372     struct kern_nexus *nx, struct kern_channel *ch)
373 {
374 #pragma unused(nxdom_prov, nx, ch)
375 	VERIFY(0);
376 	/* NOTREACHED */
377 	__builtin_unreachable();
378 }
379 
380 static void
nx_mon_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)381 nx_mon_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
382     struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
383 {
384 #pragma unused(nxdom_prov, nx, ch, p)
385 }
386 
387 static void
nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)388 nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
389     struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
390 {
391 #pragma unused(nxdom_prov, nx, ch, locked)
392 }
393 
394 static int
nx_mon_prov_init(struct kern_nexus_domain_provider * nxdom_prov)395 nx_mon_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
396 {
397 #pragma unused(nxdom_prov)
398 	SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
399 	return 0;
400 }
401 
402 static int
nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)403 nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
404     const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
405 {
406 #pragma unused(nxdom_prov, nxp, adj)
407 
408 	return 0;
409 }
410 
411 static int
nx_mon_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS])412 nx_mon_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
413     const uint32_t req, const struct nxprov_params *nxp0,
414     struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS])
415 {
416 	struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
417 
418 	return nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
419 	           nxdom, nxdom, nxdom, nx_mon_prov_params_adjust);
420 }
421 
422 static int
nx_mon_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)423 nx_mon_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
424     struct kern_nexus *nx, struct nexus_adapter *na)
425 {
426 #pragma unused(nxdom_prov)
427 	int err = 0;
428 
429 	SK_DF(SK_VERB_MONITOR,
430 	    "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
431 	    NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
432 	    SK_KVA(na));
433 
434 	ASSERT(na->na_arena == NULL);
435 	ASSERT(NX_USER_CHANNEL_PROV(nx));
436 	/*
437 	 * The underlying nexus adapter uses the same memory allocator
438 	 * as the monitored adapter; don't store the pp in the nexus.
439 	 *
440 	 * This means that clients calling kern_nexus_get_pbufpool()
441 	 * will get NULL, but this is fine since we don't expose the
442 	 * monitor to external kernel clients.
443 	 */
444 	na->na_arena = skmem_arena_create_for_nexus(na,
445 	    NX_PROV(nx)->nxprov_region_params, NULL, NULL, FALSE,
446 	    FALSE, NULL, &err);
447 	ASSERT(na->na_arena != NULL || err != 0);
448 
449 	return err;
450 }
451 
452 static void
nx_mon_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)453 nx_mon_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
454 {
455 #pragma unused(nxdom_prov)
456 	SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
457 }
458 
459 static struct nexus_monitor_adapter *
na_mon_alloc(zalloc_flags_t how)460 na_mon_alloc(zalloc_flags_t how)
461 {
462 	struct nexus_monitor_adapter *mna;
463 
464 	_CASSERT(offsetof(struct nexus_monitor_adapter, mna_up) == 0);
465 
466 	mna = zalloc_flags(na_mon_zone, how | Z_ZERO);
467 	if (mna) {
468 		mna->mna_up.na_type = NA_MONITOR;
469 		mna->mna_up.na_free = na_mon_free;
470 	}
471 	return mna;
472 }
473 
474 static void
na_mon_free(struct nexus_adapter * na)475 na_mon_free(struct nexus_adapter *na)
476 {
477 	struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
478 
479 	ASSERT(mna->mna_up.na_refcount == 0);
480 	SK_DF(SK_VERB_MEM, "mna 0x%llx FREE", SK_KVA(mna));
481 	bzero(mna, sizeof(*mna));
482 	zfree(na_mon_zone, mna);
483 }
484 
485 /*
486  * Functions common to both kind of monitors.
487  */
488 
489 /*
490  * nm_sync callback for the monitor's own tx rings.
491  * This makes no sense and always returns error
492  */
493 static int
nx_mon_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)494 nx_mon_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
495     uint32_t flags)
496 {
497 #pragma unused(kring, p, flags)
498 	SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_TX,
499 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
500 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
501 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
502 	    flags);
503 	return EIO;
504 }
505 
506 /*
507  * nm_sync callback for the monitor's own rx rings.
508  * Note that the lock in nx_mon_zcopy_parent_sync only protects
509  * writers among themselves. Synchronization between writers
510  * (i.e., nx_mon_zcopy_parent_txsync and nx_mon_zcopy_parent_rxsync)
511  * and readers (i.e., nx_mon_zcopy_parent_rxsync) relies on memory barriers.
512  */
513 static int
nx_mon_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)514 nx_mon_na_rxsync(struct __kern_channel_ring *kring, struct proc *p,
515     uint32_t flags)
516 {
517 #pragma unused(p, flags)
518 	SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_RX,
519 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
520 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
521 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
522 	    flags);
523 	kring->ckr_khead = kring->ckr_rhead;
524 	membar_sync();
525 	return 0;
526 }
527 
528 /*
529  * na_krings_create callbacks for monitors.
530  * We could use the default netmap_hw_krings_zmon, but
531  * we don't need the nx_mbq.
532  */
533 static int
nx_mon_na_krings_create(struct nexus_adapter * na,struct kern_channel * ch)534 nx_mon_na_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
535 {
536 	ASSERT(na->na_type == NA_MONITOR);
537 	return na_rings_mem_setup(na, 0, FALSE, ch);
538 }
539 
540 /* na_krings_delete callback for monitors */
541 static void
nx_mon_na_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)542 nx_mon_na_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
543     boolean_t defunct)
544 {
545 	ASSERT(na->na_type == NA_MONITOR);
546 	na_rings_mem_teardown(na, ch, defunct);
547 }
548 
549 __attribute__((always_inline))
550 static inline uint32_t
nx_mon_txrx2chmode(enum txrx t)551 nx_mon_txrx2chmode(enum txrx t)
552 {
553 	return t == NR_RX ? CHMODE_MONITOR_RX : CHMODE_MONITOR_TX;
554 }
555 
556 /* allocate the monitors array in the monitored kring */
557 static int
nx_mon_kr_alloc(struct __kern_channel_ring * kring,uint32_t n)558 nx_mon_kr_alloc(struct __kern_channel_ring *kring, uint32_t n)
559 {
560 	struct __kern_channel_ring **nm;
561 	size_t len, oldlen;
562 
563 	if (n <= kring->ckr_max_monitors) {
564 		/* we already have more entries that requested */
565 		return 0;
566 	}
567 
568 	oldlen = sizeof(struct __kern_channel_ring *) * kring->ckr_max_monitors;
569 	len = sizeof(struct __kern_channel_ring *) * n;
570 	nm = sk_realloc(kring->ckr_monitors, oldlen, len, Z_WAITOK, skmem_tag_monitors);
571 	if (nm == NULL) {
572 		return ENOMEM;
573 	}
574 
575 	kring->ckr_monitors = nm;
576 	kring->ckr_max_monitors = n;
577 
578 	return 0;
579 }
580 
581 /* deallocate the parent array in the parent adapter */
582 static void
nx_mon_kr_dealloc(struct __kern_channel_ring * kring)583 nx_mon_kr_dealloc(struct __kern_channel_ring *kring)
584 {
585 	if (kring->ckr_monitors != NULL) {
586 		if (kring->ckr_n_monitors > 0) {
587 			SK_ERR("freeing not empty monitor array for \"%s\" "
588 			    "(%u dangling monitors)!", kring->ckr_name,
589 			    kring->ckr_n_monitors);
590 		}
591 		sk_free(kring->ckr_monitors,
592 		    sizeof(struct __kern_channel_ring *) * kring->ckr_max_monitors);
593 		kring->ckr_monitors = NULL;
594 		kring->ckr_max_monitors = 0;
595 		kring->ckr_n_monitors = 0;
596 	}
597 }
598 
599 static int
nx_mon_na_krings_locks(struct nexus_adapter * na,uint32_t qfirst[NR_TXRX],uint32_t qlast[NR_TXRX])600 nx_mon_na_krings_locks(struct nexus_adapter *na,
601     uint32_t qfirst[NR_TXRX], uint32_t qlast[NR_TXRX])
602 {
603 	struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
604 	struct nexus_adapter *pna = mna->mna_pna;
605 	enum txrx t;
606 	int err = 0;
607 
608 	for_rx_tx(t) {
609 		uint32_t i;
610 
611 		if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
612 			continue;
613 		}
614 
615 		qfirst[t] = qlast[t] = mna->mna_first[t];
616 
617 		/* synchronize with concurrently running nm_sync()s */
618 		for (i = mna->mna_first[t]; i < mna->mna_last[t]; i++) {
619 			struct __kern_channel_ring *kring;
620 
621 			/* the parent adapter's kring */
622 			kring = &NAKR(pna, t)[i];
623 			kr_stop(kring, KR_LOCKED);
624 			qlast[t] = i + 1;
625 		}
626 		if (err != 0) {
627 			break;
628 		}
629 	}
630 
631 	return err;
632 }
633 
634 static void
nx_mon_na_krings_unlock(struct nexus_adapter * na,const uint32_t qfirst[NR_TXRX],const uint32_t qlast[NR_TXRX])635 nx_mon_na_krings_unlock(struct nexus_adapter *na,
636     const uint32_t qfirst[NR_TXRX], const uint32_t qlast[NR_TXRX])
637 {
638 	struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
639 	struct nexus_adapter *pna = mna->mna_pna;
640 	enum txrx t;
641 
642 	for_rx_tx(t) {
643 		uint32_t i;
644 
645 		if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
646 			continue;
647 		}
648 
649 		/* synchronize with concurrently running nm_sync()s */
650 		for (i = qfirst[t]; i < qlast[t]; i++) {
651 			struct __kern_channel_ring *kring;
652 
653 			/* the parent adapter's kring */
654 			kring = &NAKR(pna, t)[i];
655 			kr_start(kring);
656 		}
657 	}
658 }
659 
660 static int
nx_mon_enable(struct nexus_adapter * na,boolean_t zcopy)661 nx_mon_enable(struct nexus_adapter *na, boolean_t zcopy)
662 {
663 	struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
664 	struct nexus_adapter *pna = mna->mna_pna;
665 	struct skmem_arena_nexus *na_arena = skmem_arena_nexus(pna->na_arena);
666 	uint32_t qfirst[NR_TXRX], qlast[NR_TXRX];
667 	enum txrx t;
668 	int err = 0;
669 	uint32_t i;
670 
671 	ASSERT(!(na->na_flags & NAF_ACTIVE));
672 
673 	bzero(&qfirst, sizeof(qfirst));
674 	bzero(&qlast, sizeof(qlast));
675 
676 	/*
677 	 * Acquire the target kring(s).  q{first,last}0 represent the
678 	 * target ring set.  q{first,last} represent the ones that have
679 	 * been successfully acquired.  In the event the acquisition
680 	 * fails, we must release any previously-acquired rings.
681 	 */
682 	if ((err = nx_mon_na_krings_locks(na, qfirst, qlast)) != 0) {
683 		goto unlock;
684 	}
685 
686 	ASSERT(na_arena->arn_rx_pp == na_arena->arn_tx_pp);
687 	if (na_arena->arn_rx_pp->pp_max_frags > 1) {
688 		VERIFY(na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET);
689 		mna->mna_pkt_copy_from_pkt = pkt_copy_multi_buflet_from_pkt;
690 	} else {
691 		if (na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET) {
692 			mna->mna_pkt_copy_from_pkt = pkt_copy_from_pkt;
693 		} else {
694 			mna->mna_pkt_copy_from_pkt = nx_mon_quantum_copy_64x;
695 		}
696 	}
697 
698 	for_rx_tx(t) {
699 		if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
700 			continue;
701 		}
702 
703 		for (i = qfirst[t]; i < qlast[t]; i++) {
704 			struct __kern_channel_ring *kring, *mkring;
705 
706 			/* the parent adapter's kring */
707 			kring = &NAKR(pna, t)[i];
708 			mkring = &na->na_rx_rings[i];
709 			err = nx_mon_add(mkring, kring, zcopy);
710 			if (err != 0) {
711 				break;
712 			}
713 		}
714 		if (err != 0) {
715 			break;
716 		}
717 	}
718 
719 	if (err == 0) {
720 		atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
721 		goto unlock;
722 	}
723 
724 	for_rx_tx(t) {
725 		if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
726 			continue;
727 		}
728 
729 		for (i = qfirst[t]; i < qlast[t]; i++) {
730 			struct __kern_channel_ring *kring, *mkring;
731 
732 			/* the parent adapter's kring */
733 			kring = &NAKR(pna, t)[i];
734 			mkring = &na->na_rx_rings[i];
735 			nx_mon_del(mkring, kring, FALSE);
736 		}
737 	}
738 	ASSERT(!(na->na_flags & NAF_ACTIVE));
739 
740 unlock:
741 	nx_mon_na_krings_unlock(na, qfirst, qlast);
742 
743 	SK_DF(err ? SK_VERB_ERROR : SK_VERB_MONITOR,
744 	    "%s (0x%llx): mode 0x%x txrings[%u,%u], rxrings[%u,%u] err %d",
745 	    na->na_name, SK_KVA(na), mna->mna_mode, qfirst[NR_TX], qlast[NR_TX],
746 	    qfirst[NR_RX], qlast[NR_RX], err);
747 
748 	return err;
749 }
750 
751 static void
nx_mon_disable(struct nexus_adapter * na)752 nx_mon_disable(struct nexus_adapter *na)
753 {
754 	struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
755 	struct nexus_adapter *pna = mna->mna_pna;
756 	uint32_t qfirst[NR_TXRX], qlast[NR_TXRX];
757 	enum txrx t;
758 	int err;
759 	uint32_t i;
760 
761 	ASSERT(na->na_flags & NAF_ACTIVE);
762 
763 	bzero(&qfirst, sizeof(qfirst));
764 	bzero(&qlast, sizeof(qlast));
765 
766 	/* blocking kring(s) acquisition; must not fail */
767 	err = nx_mon_na_krings_locks(na, qfirst, qlast);
768 	ASSERT(err == 0);
769 	mna->mna_pkt_copy_from_pkt = NULL;
770 	for_rx_tx(t) {
771 		if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
772 			continue;
773 		}
774 
775 		for (i = qfirst[t]; i < qlast[t]; i++) {
776 			struct __kern_channel_ring *kring, *mkring;
777 
778 			kring = &NAKR(pna, t)[i];
779 			mkring = &na->na_rx_rings[i];
780 			nx_mon_del(mkring, kring, FALSE);
781 		}
782 	}
783 	atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
784 
785 	nx_mon_na_krings_unlock(na, qfirst, qlast);
786 }
787 
788 /*
789  * Add the monitor mkring to the list of monitors of kring.
790  * If this is the first monitor, intercept the callbacks
791  */
792 static int
nx_mon_add(struct __kern_channel_ring * mkring,struct __kern_channel_ring * kring,boolean_t zcopy)793 nx_mon_add(struct __kern_channel_ring *mkring,
794     struct __kern_channel_ring *kring, boolean_t zcopy)
795 {
796 	int error;
797 
798 	/* make sure the monitor array exists and is big enough */
799 	error = nx_mon_kr_alloc(kring, kring->ckr_n_monitors + 1);
800 	if (error != 0) {
801 		return error;
802 	}
803 
804 	kring->ckr_monitors[kring->ckr_n_monitors] = mkring;
805 	mkring->ckr_mon_pos = kring->ckr_n_monitors;
806 	kring->ckr_n_monitors++;
807 	if (kring->ckr_n_monitors == 1) {
808 		/* this is the first monitor, intercept callbacks */
809 		SK_DF(SK_VERB_MONITOR,
810 		    "mkr \"%s\" (0x%llx) krflags 0x%b intercept callbacks "
811 		    "on kr \"%s\" (0x%llx) krflags 0x%b", mkring->ckr_name,
812 		    SK_KVA(mkring), mkring->ckr_flags, CKRF_BITS,
813 		    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
814 		    CKRF_BITS);
815 		kring->ckr_mon_sync = kring->ckr_na_sync;
816 		/*
817 		 * zcopy monitors do not override nm_notify(), but
818 		 * we save the original one regardless, so that
819 		 * nx_mon_del() does not need to know the
820 		 * monitor type
821 		 */
822 		kring->ckr_mon_notify = kring->ckr_na_notify;
823 		if (kring->ckr_tx == NR_TX) {
824 			kring->ckr_na_sync =
825 			    (zcopy ? nx_mon_zcopy_parent_txsync :
826 			    nx_mon_parent_txsync);
827 		} else {
828 			kring->ckr_na_sync =
829 			    (zcopy ? nx_mon_zcopy_parent_rxsync :
830 			    nx_mon_parent_rxsync);
831 			if (!zcopy) {
832 				/* also intercept notify */
833 				kring->ckr_na_notify = nx_mon_parent_notify;
834 				kring->ckr_mon_tail = kring->ckr_ktail;
835 			}
836 		}
837 	} else {
838 		SK_DF(SK_VERB_MONITOR,
839 		    "mkr \"%s\" (0x%llx) krflags 0x%b already intercept "
840 		    "callbacks on kr \"%s\" (0x%llx) krflags 0x%b, "
841 		    "%u monitors", mkring->ckr_name, SK_KVA(mkring),
842 		    mkring->ckr_flags, CKRF_BITS, kring->ckr_name,
843 		    SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
844 		    kring->ckr_n_monitors);
845 	}
846 	return 0;
847 }
848 
849 /*
850  * Remove the monitor mkring from the list of monitors of kring.
851  * If this is the last monitor, restore the original callbacks
852  */
853 static void
nx_mon_del(struct __kern_channel_ring * mkring,struct __kern_channel_ring * kring,boolean_t all)854 nx_mon_del(struct __kern_channel_ring *mkring,
855     struct __kern_channel_ring *kring, boolean_t all)
856 {
857 	ASSERT(kring->ckr_n_monitors != 0);
858 	if (all) {
859 		kring->ckr_n_monitors = 0;
860 	} else {
861 		kring->ckr_n_monitors--;
862 		if (mkring->ckr_mon_pos != kring->ckr_n_monitors) {
863 			kring->ckr_monitors[mkring->ckr_mon_pos] =
864 			    kring->ckr_monitors[kring->ckr_n_monitors];
865 			kring->ckr_monitors[mkring->ckr_mon_pos]->ckr_mon_pos =
866 			    mkring->ckr_mon_pos;
867 		}
868 		kring->ckr_monitors[kring->ckr_n_monitors] = NULL;
869 	}
870 	if (kring->ckr_n_monitors == 0) {
871 		/*
872 		 * This was the last monitor, restore callbacks
873 		 * and delete monitor array.
874 		 */
875 		SK_DF(SK_VERB_MONITOR,
876 		    "restoring sync callback on kr \"%s\" (0x%llx) "
877 		    "krflags 0x%b", kring->ckr_name, SK_KVA(kring),
878 		    kring->ckr_flags, CKRF_BITS);
879 		kring->ckr_na_sync = kring->ckr_mon_sync;
880 		kring->ckr_mon_sync = NULL;
881 		if (kring->ckr_tx == NR_RX) {
882 			SK_DF(SK_VERB_MONITOR,
883 			    "restoring notify callback on kr \"%s\" (0x%llx) "
884 			    "krflags 0x%b", kring->ckr_name, SK_KVA(kring),
885 			    kring->ckr_flags, CKRF_BITS);
886 			kring->ckr_na_notify = kring->ckr_mon_notify;
887 			kring->ckr_mon_notify = NULL;
888 		}
889 		nx_mon_kr_dealloc(kring);
890 	} else {
891 		SK_DF(SK_VERB_MONITOR,
892 		    "NOT restoring callbacks on kr \"%s\" (0x%llx) "
893 		    "krflags 0x%b, %u monitors left", kring->ckr_name,
894 		    SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
895 		    kring->ckr_n_monitors);
896 	}
897 }
898 
899 /*
900  * This is called when the monitored adapter leaves skywalk mode (see
901  * na_unbind_channel).  We need to notify the monitors that the monitored
902  * rings are gone.  We do this by setting their mna->mna_pna to NULL.
903  * Note that the rings must be stopped when this happens, so no monitor
904  * ring callback can be active.
905  */
906 void
nx_mon_stop(struct nexus_adapter * na)907 nx_mon_stop(struct nexus_adapter *na)
908 {
909 	enum txrx t;
910 
911 	SK_LOCK_ASSERT_HELD();
912 
913 	/* skip if this adapter has no allocated rings */
914 	if (na->na_tx_rings == NULL) {
915 		return;
916 	}
917 
918 	na_disable_all_rings(na);
919 
920 	for_rx_tx(t) {
921 		uint32_t i;
922 
923 		for (i = 0; i < na_get_nrings(na, t); i++) {
924 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
925 			uint32_t j;
926 
927 			for (j = 0; j < kring->ckr_n_monitors; j++) {
928 				struct __kern_channel_ring *mkring =
929 				    kring->ckr_monitors[j];
930 				struct nexus_monitor_adapter *mna =
931 				    (struct nexus_monitor_adapter *)
932 				    KRNA(mkring);
933 
934 				/* forget about this adapter */
935 				if (mna->mna_pna != NULL) {
936 					ASSERT(na == mna->mna_pna);
937 					(void) na_release_locked(mna->mna_pna);
938 					mna->mna_pna = NULL;
939 				}
940 			}
941 
942 			/*
943 			 * Remove all monitors and restore callbacks;
944 			 * this is important for nexus adapters that
945 			 * are linked to one another, e.g. pipe, since
946 			 * the callback changes on one adapter affects
947 			 * its peer during sync times.
948 			 */
949 			if (kring->ckr_n_monitors > 0) {
950 				nx_mon_del(NULL, kring, TRUE);
951 			}
952 
953 			ASSERT(kring->ckr_monitors == NULL);
954 			ASSERT(kring->ckr_max_monitors == 0);
955 			ASSERT(kring->ckr_n_monitors == 0);
956 		}
957 	}
958 
959 	na_enable_all_rings(na);
960 }
961 
962 /*
963  * Common functions for the na_activate() callbacks of both kind of
964  * monitors.
965  */
966 static int
nx_mon_na_activate_common(struct nexus_adapter * na,na_activate_mode_t mode,boolean_t zcopy)967 nx_mon_na_activate_common(struct nexus_adapter *na, na_activate_mode_t mode,
968     boolean_t zcopy)
969 {
970 	struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
971 	struct nexus_adapter *pna = mna->mna_pna;
972 	int err = 0;
973 
974 	ASSERT(na->na_type == NA_MONITOR);
975 
976 	SK_DF(SK_VERB_MONITOR, "na \"%s\" (0x%llx) %s zcopy %u", na->na_name,
977 	    SK_KVA(na), na_activate_mode2str(mode), zcopy);
978 
979 	switch (mode) {
980 	case NA_ACTIVATE_MODE_ON:
981 		if (pna == NULL) {
982 			/* parent left skywalk mode, fatal */
983 			SK_ERR("%s: internal error", na->na_name);
984 			err = ENXIO;
985 		} else {
986 			err = nx_mon_enable(na, zcopy);
987 		}
988 		break;
989 
990 	case NA_ACTIVATE_MODE_DEFUNCT:
991 		break;
992 
993 	case NA_ACTIVATE_MODE_OFF:
994 		if (pna == NULL) {
995 			SK_DF(SK_VERB_MONITOR, "%s: parent left skywalk mode, "
996 			    "nothing to restore", na->na_name);
997 		} else {
998 			nx_mon_disable(na);
999 		}
1000 		break;
1001 
1002 	default:
1003 		VERIFY(0);
1004 		/* NOTREACHED */
1005 		__builtin_unreachable();
1006 	}
1007 
1008 	return err;
1009 }
1010 
1011 /*
1012  * Functions specific for zero-copy monitors.
1013  */
1014 
1015 /*
1016  * Common function for both zero-copy tx and rx nm_sync()
1017  * callbacks
1018  */
1019 static int
nx_mon_zcopy_parent_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,enum txrx tx)1020 nx_mon_zcopy_parent_sync(struct __kern_channel_ring *kring, struct proc *p,
1021     uint32_t flags, enum txrx tx)
1022 {
1023 	struct __kern_channel_ring *mkring = kring->ckr_monitors[0];
1024 	int rel_slots, free_slots, busy, sent = 0;
1025 	slot_idx_t beg, end, i;
1026 	const slot_idx_t lim = kring->ckr_lim;
1027 	const slot_idx_t mlim;
1028 	int error = 0;
1029 
1030 	if (mkring == NULL) {
1031 		SK_RD(5, "NULL monitor on kr \"%s\" (0x%llx) krflags 0x%b",
1032 		    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1033 		    CKRF_BITS);
1034 		return 0;
1035 	}
1036 
1037 	ASSERT(!KR_KERNEL_ONLY(kring));
1038 	ASSERT(!KR_KERNEL_ONLY(mkring));
1039 
1040 	/* deconst */
1041 	*(slot_idx_t *)(uintptr_t)&mlim = mkring->ckr_lim;
1042 
1043 	/* get the relased slots (rel_slots) */
1044 	if (tx == NR_TX) {
1045 		beg = kring->ckr_ktail;
1046 		error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1047 		if (error) {
1048 			return error;
1049 		}
1050 		end = kring->ckr_ktail;
1051 	} else { /* NR_RX */
1052 		beg = kring->ckr_khead;
1053 		end = kring->ckr_rhead;
1054 	}
1055 
1056 	rel_slots = end - beg;
1057 	if (rel_slots < 0) {
1058 		rel_slots += kring->ckr_num_slots;
1059 	}
1060 
1061 	if (!rel_slots) {
1062 		/*
1063 		 * No released slots, but we still need
1064 		 * to call rxsync if this is a rx ring
1065 		 */
1066 		goto out_rxsync;
1067 	}
1068 
1069 	/*
1070 	 * We need to lock the monitor receive ring, since it
1071 	 * is the target of bot tx and rx traffic from the monitored
1072 	 * adapter
1073 	 */
1074 	KR_LOCK(mkring);
1075 	/* get the free slots available on the monitor ring */
1076 	i = mkring->ckr_ktail;
1077 	busy = i - mkring->ckr_khead;
1078 	if (busy < 0) {
1079 		busy += mkring->ckr_num_slots;
1080 	}
1081 	free_slots = mlim - busy;
1082 
1083 	if (!free_slots) {
1084 		goto out;
1085 	}
1086 
1087 	/* swap min(free_slots, rel_slots) slots */
1088 	if (free_slots < rel_slots) {
1089 		beg += (rel_slots - free_slots);
1090 		if (beg >= kring->ckr_num_slots) {
1091 			beg -= kring->ckr_num_slots;
1092 		}
1093 		rel_slots = free_slots;
1094 	}
1095 
1096 	sent = rel_slots;
1097 	for (; rel_slots; rel_slots--) {
1098 		/*
1099 		 * Swap the slots.
1100 		 *
1101 		 * XXX: [email protected] -- this bypasses the slot attach/detach
1102 		 * interface, and needs to be changed when monitor adopts the
1103 		 * packet APIs.  SD_SWAP() will perform a block copy of the
1104 		 * swap, and will readjust the kernel slot descriptor's sd_user
1105 		 * accordingly.
1106 		 */
1107 		SD_SWAP(KR_KSD(mkring, i), KR_USD(mkring, i),
1108 		    KR_KSD(kring, beg), KR_USD(kring, beg));
1109 
1110 		SK_RD(5, "beg %u buf_idx %u", beg,
1111 		    METADATA_IDX(KR_KSD(kring, beg)->sd_qum));
1112 
1113 		beg = SLOT_NEXT(beg, lim);
1114 		i = SLOT_NEXT(i, mlim);
1115 	}
1116 	membar_sync();
1117 	mkring->ckr_ktail = i;
1118 
1119 out:
1120 	KR_UNLOCK(mkring);
1121 
1122 	if (sent) {
1123 		/* notify the new frames to the monitor */
1124 		(void) mkring->ckr_na_notify(mkring, p, 0);
1125 	}
1126 
1127 out_rxsync:
1128 	if (tx == NR_RX) {
1129 		error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1130 	}
1131 
1132 	return error;
1133 }
1134 
1135 /*
1136  * Callback used to replace the ckr_na_sync callback in the monitored tx rings.
1137  */
1138 static int
nx_mon_zcopy_parent_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1139 nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *kring, struct proc *p,
1140     uint32_t flags)
1141 {
1142 	SK_DF(SK_VERB_MONITOR,
1143 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x",
1144 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1145 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1146 	return nx_mon_zcopy_parent_sync(kring, p, flags, NR_TX);
1147 }
1148 
1149 /* callback used to replace the nm_sync callback in the monitored rx rings */
1150 static int
nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1151 nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1152     uint32_t flags)
1153 {
1154 	SK_DF(SK_VERB_MONITOR,
1155 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x",
1156 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1157 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1158 	return nx_mon_zcopy_parent_sync(kring, p, flags, NR_RX);
1159 }
1160 
1161 static int
nx_mon_zcopy_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1162 nx_mon_zcopy_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1163 {
1164 	return nx_mon_na_activate_common(na, mode, TRUE /* zcopy */);
1165 }
1166 
1167 /* na_dtor callback for monitors */
1168 static void
nx_mon_zcopy_na_dtor(struct nexus_adapter * na)1169 nx_mon_zcopy_na_dtor(struct nexus_adapter *na)
1170 {
1171 	struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
1172 	struct nexus_adapter *pna = mna->mna_pna;
1173 
1174 	SK_LOCK_ASSERT_HELD();
1175 	ASSERT(na->na_type == NA_MONITOR);
1176 
1177 	if (pna != NULL) {
1178 		(void) na_release_locked(pna);
1179 		mna->mna_pna = NULL;
1180 	}
1181 }
1182 
1183 /*
1184  * Functions specific for copy monitors.
1185  */
1186 
1187 static void
nx_mon_parent_sync(struct __kern_channel_ring * kring,struct proc * p,slot_idx_t first_new,int new_slots)1188 nx_mon_parent_sync(struct __kern_channel_ring *kring, struct proc *p,
1189     slot_idx_t first_new, int new_slots)
1190 {
1191 	nexus_meta_type_t md_type = KRNA(kring)->na_md_type;
1192 	uint32_t j;
1193 
1194 	for (j = 0; j < kring->ckr_n_monitors; j++) {
1195 		struct __kern_channel_ring *mkring = kring->ckr_monitors[j];
1196 		slot_idx_t i, mlim, beg;
1197 		int free_slots, busy, sent = 0, m;
1198 		const slot_idx_t lim = kring->ckr_lim;
1199 		struct nexus_adapter *dst_na = KRNA(mkring);
1200 		struct nexus_monitor_adapter *mna =
1201 		    (struct nexus_monitor_adapter *)dst_na;
1202 		uint32_t max_len = mkring->ckr_pp->pp_max_frags *
1203 		    mkring->ckr_pp->pp_buflet_size;
1204 
1205 		/*
1206 		 * src and dst adapters must share the same nexus;
1207 		 * this test is done in nx_monitor_na_find().  This
1208 		 * covers both buffer and metadata sizes.
1209 		 */
1210 
1211 		mlim = mkring->ckr_lim;
1212 
1213 		/*
1214 		 * We need to lock the monitor receive ring, since it
1215 		 * is the target of both tx and rx traffics from the
1216 		 * monitored adapter.
1217 		 */
1218 		KR_LOCK(mkring);
1219 		/* get the free slots available on the monitor ring */
1220 		i = mkring->ckr_ktail;
1221 		busy = i - mkring->ckr_khead;
1222 		if (busy < 0) {
1223 			busy += mkring->ckr_num_slots;
1224 		}
1225 		free_slots = mlim - busy;
1226 
1227 		if (!free_slots) {
1228 			goto out;
1229 		}
1230 
1231 		/* copy min(free_slots, new_slots) slots */
1232 		m = new_slots;
1233 		beg = first_new;
1234 		if (free_slots < m) {
1235 			beg += (m - free_slots);
1236 			if (beg >= kring->ckr_num_slots) {
1237 				beg -= kring->ckr_num_slots;
1238 			}
1239 			m = free_slots;
1240 		}
1241 
1242 		ASSERT(KRNA(mkring)->na_md_type == md_type);
1243 
1244 		for (; m; m--) {
1245 			struct __kern_slot_desc *src_sd = KR_KSD(kring, beg);
1246 			struct __kern_slot_desc *dst_sd = KR_KSD(mkring, i);
1247 			struct __kern_packet *spkt, *dpkt;
1248 			kern_packet_t sph, dph;
1249 			uint32_t copy_len;
1250 
1251 			if (!KSD_VALID_METADATA(src_sd)) {
1252 				goto skip;
1253 			}
1254 
1255 			/* retreive packet handles from slot */
1256 			spkt = src_sd->sd_pkt;
1257 			sph = SK_PTR_ENCODE(spkt, METADATA_TYPE(spkt),
1258 			    METADATA_SUBTYPE(spkt));
1259 			dpkt = dst_sd->sd_pkt;
1260 			dph = SK_PTR_ENCODE(dpkt, METADATA_TYPE(dpkt),
1261 			    METADATA_SUBTYPE(dpkt));
1262 
1263 			ASSERT(METADATA_TYPE(spkt) == METADATA_TYPE(dpkt));
1264 
1265 			ASSERT(spkt->pkt_qum.qum_len <= (UINT32_MAX - 63));
1266 			copy_len = spkt->pkt_qum.qum_len;
1267 
1268 			/* round to a multiple of 64 */
1269 			copy_len = (copy_len + 63) & ~63;
1270 
1271 			if (__improbable(copy_len > max_len)) {
1272 				SK_RD(5, "kr \"%s\" -> mkr \"%s\": "
1273 				    "truncating %u to %u",
1274 				    kring->ckr_name, mkring->ckr_name,
1275 				    (uint32_t)copy_len, max_len);
1276 				copy_len = max_len;
1277 			}
1278 
1279 			/* copy buffers */
1280 			mna->mna_pkt_copy_from_pkt(kring->ckr_tx, dph, 0, sph,
1281 			    0, copy_len, FALSE, 0, 0, FALSE);
1282 
1283 			/* copy the associated meta data */
1284 			_QUM_COPY(&(spkt)->pkt_qum, &(dpkt)->pkt_qum);
1285 			if (md_type == NEXUS_META_TYPE_PACKET) {
1286 				_PKT_COPY(spkt, dpkt);
1287 				ASSERT(dpkt->pkt_mbuf == NULL);
1288 			}
1289 
1290 			ASSERT(!(dpkt->pkt_qum.qum_qflags & QUM_F_KERNEL_ONLY) ||
1291 			    PP_KERNEL_ONLY(dpkt->pkt_qum.qum_pp));
1292 
1293 			sent++;
1294 			i = SLOT_NEXT(i, mlim);
1295 skip:
1296 			beg = SLOT_NEXT(beg, lim);
1297 		}
1298 		membar_sync();
1299 		mkring->ckr_ktail = i;
1300 out:
1301 		KR_UNLOCK(mkring);
1302 
1303 		if (sent) {
1304 			/* notify the new frames to the monitor */
1305 			(void) mkring->ckr_na_notify(mkring, p, 0);
1306 		}
1307 	}
1308 }
1309 
1310 /* callback used to replace the nm_sync callback in the monitored tx rings */
1311 static int
nx_mon_parent_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1312 nx_mon_parent_txsync(struct __kern_channel_ring *kring, struct proc *p,
1313     uint32_t flags)
1314 {
1315 	slot_idx_t first_new;
1316 	int new_slots;
1317 	nexus_type_t nx_type =
1318 	    kring->ckr_na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
1319 
1320 	/*
1321 	 * For user pipe nexus, txsync can also be initated from RX process
1322 	 * context, hence user pipe tx ring should be accessed holding
1323 	 * ckr_qlock.
1324 	 */
1325 	if (nx_type == NEXUS_TYPE_USER_PIPE) {
1326 		KR_LOCK(kring);
1327 	}
1328 
1329 	/* get the new slots */
1330 	first_new = kring->ckr_khead;
1331 	new_slots = kring->ckr_rhead - first_new;
1332 	if (new_slots < 0) {
1333 		new_slots += kring->ckr_num_slots;
1334 	}
1335 	if (new_slots) {
1336 		nx_mon_parent_sync(kring, p, first_new, new_slots);
1337 	}
1338 
1339 	if (nx_type == NEXUS_TYPE_USER_PIPE) {
1340 		KR_UNLOCK(kring);
1341 	}
1342 
1343 	return kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1344 }
1345 
1346 /* callback used to replace the nm_sync callback in the monitored rx rings */
1347 static int
nx_mon_parent_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1348 nx_mon_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1349     uint32_t flags)
1350 {
1351 	slot_idx_t first_new;
1352 	int new_slots, error;
1353 
1354 	/* get the new slots */
1355 	error =  kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1356 	if (error) {
1357 		return error;
1358 	}
1359 	first_new = kring->ckr_mon_tail;
1360 	new_slots = kring->ckr_ktail - first_new;
1361 	if (new_slots < 0) {
1362 		new_slots += kring->ckr_num_slots;
1363 	}
1364 	if (new_slots) {
1365 		nx_mon_parent_sync(kring, p, first_new, new_slots);
1366 	}
1367 	kring->ckr_mon_tail = kring->ckr_ktail;
1368 	return 0;
1369 }
1370 
1371 /*
1372  * Callback used to replace the nm_notify() callback in the monitored rx rings
1373  */
1374 static int
nx_mon_parent_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1375 nx_mon_parent_notify(struct __kern_channel_ring *kring, struct proc *p,
1376     uint32_t flags)
1377 {
1378 	int err = 0;
1379 	sk_protect_t protect = NULL;
1380 
1381 	SK_DF(SK_VERB_MONITOR | SK_VERB_NOTIFY |
1382 	    ((kring->ckr_tx == NR_TX) ? SK_VERB_TX : SK_VERB_RX),
1383 	    "kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x", kring->ckr_name,
1384 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1385 	/*
1386 	 * ?xsync callbacks have tryget called by their callers,
1387 	 * but here we have to call it by ourself.  If we can't
1388 	 * acquire the exclusive sync right, skip the sync.
1389 	 */
1390 	if ((err = kr_enter(kring, FALSE)) == 0) {
1391 		protect = sk_sync_protect();
1392 		nx_mon_parent_rxsync(kring, p, NA_SYNCF_FORCE_READ);
1393 		sk_sync_unprotect(protect);
1394 		kr_exit(kring);
1395 	}
1396 	/* in all cases (even error), we must invoke notify */
1397 	kring->ckr_mon_notify(kring, p, (NA_NOTEF_MONITOR | flags));
1398 	return err;
1399 }
1400 
1401 static int
nx_mon_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1402 nx_mon_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1403 {
1404 	return nx_mon_na_activate_common(na, mode, FALSE /* no zcopy */);
1405 }
1406 
1407 static void
nx_mon_na_dtor(struct nexus_adapter * na)1408 nx_mon_na_dtor(struct nexus_adapter *na)
1409 {
1410 	struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
1411 	struct nexus_adapter *pna = mna->mna_pna;
1412 
1413 	SK_LOCK_ASSERT_HELD();
1414 	ASSERT(na->na_type == NA_MONITOR);
1415 
1416 	if (pna != NULL) {
1417 		(void) na_release_locked(pna);
1418 		mna->mna_pna = NULL;
1419 	}
1420 }
1421 
1422 /* check if chr is a request for a monitor adapter that we can satisfy */
1423 int
nx_monitor_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)1424 nx_monitor_na_find(struct kern_nexus *nx, struct kern_channel *ch,
1425     struct chreq *chr, struct kern_channel *ch0, struct nxbind *nxb,
1426     struct proc *p, struct nexus_adapter **na, boolean_t create)
1427 {
1428 #pragma unused(ch)
1429 	boolean_t zcopy = !!(chr->cr_mode & CHMODE_MONITOR_NO_COPY);
1430 	struct nexus_adapter *pna = NULL; /* parent adapter */
1431 	struct nexus_monitor_adapter *mna = NULL;
1432 	char monsuff[10] = "";
1433 	struct chreq pchr;
1434 	uint32_t i;
1435 	int error;
1436 	enum txrx t;
1437 
1438 	SK_LOCK_ASSERT_HELD();
1439 	*na = NULL;
1440 
1441 #if SK_LOG
1442 	uuid_string_t uuidstr;
1443 	SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
1444 	    "ring_id %d ring_set %u ep_type %u:%u ch0 0x%llx create %u%s",
1445 	    chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
1446 	    (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
1447 	    chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
1448 	    chr->cr_real_endpoint, chr->cr_endpoint, SK_KVA(ch0), create,
1449 	    !(chr->cr_mode & CHMODE_MONITOR) ? " (skipped)" : "");
1450 #endif /* SK_LOG */
1451 
1452 	if (!(chr->cr_mode & CHMODE_MONITOR)) {
1453 		return 0;
1454 	}
1455 
1456 	/* XXX: Don't allow user packet pool mode in monitor for now */
1457 	if (chr->cr_mode & CHMODE_USER_PACKET_POOL) {
1458 		SK_ERR("User Packet pool mode not supported for monitor");
1459 		return ENOTSUP;
1460 	}
1461 
1462 	mna = na_mon_alloc(Z_WAITOK);
1463 
1464 	ASSERT(mna->mna_up.na_type == NA_MONITOR);
1465 	ASSERT(mna->mna_up.na_free == na_mon_free);
1466 
1467 	/* override the ring set since we're monitoring */
1468 	chr->cr_ring_set = RING_SET_ALL;
1469 
1470 	if (ch0 != NULL) {
1471 		/*
1472 		 * We've been given the owning channel from ch_open();
1473 		 * use this as shortcut since otherwise we'd have to
1474 		 * find it ourselves.
1475 		 */
1476 #if (DEBUG || DEVELOPMENT)
1477 		ASSERT(!(ch0->ch_info->cinfo_ch_mode & CHMODE_MONITOR));
1478 		ASSERT(ch0->ch_info->cinfo_nx_port == chr->cr_port);
1479 #endif /* DEBUG || DEVELOPMENT */
1480 		pna = ch0->ch_na;
1481 		na_retain_locked(pna);
1482 	} else {
1483 		/*
1484 		 * First, try to find the adapter that we want to monitor
1485 		 * We use the same chr, after we have turned off the monitor
1486 		 * flags.  In this way we can potentially monitor everything
1487 		 * skywalk understands, except other monitors.
1488 		 */
1489 		memcpy(&pchr, chr, sizeof(pchr));
1490 		pchr.cr_mode &= ~CHMODE_MONITOR;
1491 		error = na_find(ch, nx, &pchr, ch0, nxb, p, &pna, create);
1492 		if (error != 0) {
1493 			SK_ERR("parent lookup failed: %d", error);
1494 			return error;
1495 		}
1496 	}
1497 	ASSERT(pna != NULL);
1498 	SK_DF(SK_VERB_MONITOR,
1499 	    "found parent: \"%s\" (0x%llx)", pna->na_name, SK_KVA(pna));
1500 
1501 	if (!NA_IS_ACTIVE(pna)) {
1502 		/* parent not in skywalk mode */
1503 		/*
1504 		 * XXX we can wait for the parent to enter skywalk mode,
1505 		 * by intercepting its na_activate() callback (2014-03-16)
1506 		 */
1507 		SK_ERR("parent \"%s\" (0x%llx) not in skywalk mode",
1508 		    pna->na_name, SK_KVA(pna));
1509 		error = ENXIO;
1510 		goto put_out;
1511 	} else if (zcopy && NA_KERNEL_ONLY(pna)) {
1512 		/*
1513 		 * Zero-copy mode requires the parent adapter to be
1514 		 * created in a non-kernel-only mode.
1515 		 */
1516 		SK_ERR("parent \"%s\" (0x%llx) is in kernel-only mode",
1517 		    pna->na_name, SK_KVA(pna));
1518 		error = ENODEV;
1519 		goto put_out;
1520 	}
1521 
1522 	/* grab all the rings we need in the parent */
1523 	mna->mna_pna = pna;
1524 	error = na_interp_ringid(pna, chr->cr_ring_id, chr->cr_ring_set,
1525 	    mna->mna_first, mna->mna_last);
1526 	if (error != 0) {
1527 		SK_ERR("ring_mode %u ring_id %d error %d", chr->cr_ring_set,
1528 		    (int)chr->cr_ring_id, error);
1529 		goto put_out;
1530 	}
1531 	if (mna->mna_last[NR_TX] - mna->mna_first[NR_TX] == 1) {
1532 		(void) snprintf(monsuff, 10, "-%u", mna->mna_first[NR_TX]);
1533 	}
1534 	(void) snprintf(mna->mna_up.na_name, sizeof(mna->mna_up.na_name),
1535 	    "%s%s/%s%s%s", pna->na_name, monsuff, zcopy ? "z" : "",
1536 	    (chr->cr_mode & CHMODE_MONITOR_TX) ? "r" : "",
1537 	    (chr->cr_mode & CHMODE_MONITOR_RX) ? "t" : "");
1538 	uuid_generate_random(mna->mna_up.na_uuid);
1539 
1540 	/* these don't apply to the monitor adapter */
1541 	*(nexus_stats_type_t *)(uintptr_t)&mna->mna_up.na_stats_type =
1542 	    NEXUS_STATS_TYPE_INVALID;
1543 	*(uint32_t *)(uintptr_t)&mna->mna_up.na_flowadv_max = 0;
1544 
1545 	if (zcopy) {
1546 		/*
1547 		 * Zero copy monitors need exclusive access
1548 		 * to the monitored rings.
1549 		 */
1550 		for_rx_tx(t) {
1551 			if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) {
1552 				continue;
1553 			}
1554 			for (i = mna->mna_first[t];
1555 			    i < mna->mna_last[t]; i++) {
1556 				struct __kern_channel_ring *kring =
1557 				    &NAKR(pna, t)[i];
1558 				if (kring->ckr_n_monitors > 0) {
1559 					error = EBUSY;
1560 					SK_ERR("kr \"%s\" already monitored "
1561 					    "by \"%s\"", kring->ckr_name,
1562 					    kring->ckr_monitors[0]->ckr_name);
1563 					goto put_out;
1564 				}
1565 			}
1566 		}
1567 		mna->mna_up.na_activate = nx_mon_zcopy_na_activate;
1568 		mna->mna_up.na_dtor = nx_mon_zcopy_na_dtor;
1569 		/*
1570 		 * To have zero copy, we need to use the same memory allocator
1571 		 * as the monitored port.
1572 		 */
1573 		mna->mna_up.na_arena = pna->na_arena;
1574 		skmem_arena_retain((&mna->mna_up)->na_arena);
1575 		atomic_bitset_32(&mna->mna_up.na_flags, NAF_MEM_LOANED);
1576 	} else {
1577 		/* normal monitors are incompatible with zero copy ones */
1578 		for_rx_tx(t) {
1579 			if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) {
1580 				continue;
1581 			}
1582 			for (i = mna->mna_first[t];
1583 			    i < mna->mna_last[t]; i++) {
1584 				struct __kern_channel_ring *kring =
1585 				    &NAKR(pna, t)[i];
1586 				if (kring->ckr_n_monitors > 0 &&
1587 				    KRNA(kring->ckr_monitors[0])->
1588 				    na_activate == nx_mon_zcopy_na_activate) {
1589 					error = EBUSY;
1590 					SK_ERR("kr \"%s\" is busy (zcopy)",
1591 					    kring->ckr_name);
1592 					goto put_out;
1593 				}
1594 			}
1595 		}
1596 		mna->mna_up.na_activate = nx_mon_na_activate;
1597 		mna->mna_up.na_dtor = nx_mon_na_dtor;
1598 		/*
1599 		 * allocate a new (private) allocator instance using the
1600 		 * parent nexus configuration.
1601 		 */
1602 		if ((error = nx_monitor_prov_s.nxdom_prov_mem_new(
1603 			    NX_DOM_PROV(nx), nx, &mna->mna_up)) != 0) {
1604 			ASSERT(mna->mna_up.na_arena == NULL);
1605 			goto put_out;
1606 		}
1607 		ASSERT(mna->mna_up.na_arena != NULL);
1608 		mna->mna_up.na_rxsync = nx_mon_na_rxsync;
1609 	}
1610 	*(nexus_meta_type_t *)(uintptr_t)&mna->mna_up.na_md_type =
1611 	    pna->na_md_type;
1612 	*(nexus_meta_subtype_t *)(uintptr_t)&mna->mna_up.na_md_subtype =
1613 	    pna->na_md_subtype;
1614 
1615 	/* a do-nothing txsync: monitors cannot be used to inject packets */
1616 	mna->mna_up.na_txsync = nx_mon_na_txsync;
1617 	mna->mna_up.na_rxsync = nx_mon_na_rxsync;
1618 	mna->mna_up.na_krings_create = nx_mon_na_krings_create;
1619 	mna->mna_up.na_krings_delete = nx_mon_na_krings_delete;
1620 
1621 	/*
1622 	 * We set the number of our na_rx_rings to be
1623 	 * max(na_num_tx_rings, na_num_rx_rings) in the parent
1624 	 */
1625 	na_set_nrings(&mna->mna_up, NR_TX, na_get_nrings(pna, NR_TX));
1626 	na_set_nrings(&mna->mna_up, NR_RX, na_get_nrings(pna, NR_RX));
1627 	if (na_get_nrings(pna, NR_TX) > na_get_nrings(pna, NR_RX)) {
1628 		na_set_nrings(&mna->mna_up, NR_RX, na_get_nrings(pna, NR_TX));
1629 	}
1630 	na_set_nslots(&mna->mna_up, NR_TX, na_get_nslots(pna, NR_TX));
1631 	na_set_nslots(&mna->mna_up, NR_RX, na_get_nslots(pna, NR_RX));
1632 
1633 	na_attach_common(&mna->mna_up, nx, &nx_monitor_prov_s);
1634 
1635 	/* remember the traffic directions we have to monitor */
1636 	mna->mna_mode = (chr->cr_mode & CHMODE_MONITOR);
1637 
1638 	/* keep the reference to the parent */
1639 	*na = &mna->mna_up;
1640 	na_retain_locked(*na);
1641 
1642 	/* sanity check: monitor and monitored adapters must share the nexus */
1643 	ASSERT((*na)->na_nx == pna->na_nx);
1644 
1645 #if SK_LOG
1646 	SK_DF(SK_VERB_MONITOR, "created monitor adapter 0x%llx", SK_KVA(mna));
1647 	SK_DF(SK_VERB_MONITOR, "na_name: \"%s\"", mna->mna_up.na_name);
1648 	SK_DF(SK_VERB_MONITOR, "  UUID:         %s",
1649 	    sk_uuid_unparse(mna->mna_up.na_uuid, uuidstr));
1650 	SK_DF(SK_VERB_MONITOR, "  nx:           0x%llx (\"%s\":\"%s\")",
1651 	    SK_KVA(mna->mna_up.na_nx), NX_DOM(mna->mna_up.na_nx)->nxdom_name,
1652 	    NX_DOM_PROV(mna->mna_up.na_nx)->nxdom_prov_name);
1653 	SK_DF(SK_VERB_MONITOR, "  flags:        0x%b",
1654 	    mna->mna_up.na_flags, NAF_BITS);
1655 	SK_DF(SK_VERB_MONITOR, "  rings:        tx %u rx %u",
1656 	    na_get_nrings(&mna->mna_up, NR_TX),
1657 	    na_get_nrings(&mna->mna_up, NR_RX));
1658 	SK_DF(SK_VERB_MONITOR, "  slots:        tx %u rx %u",
1659 	    na_get_nslots(&mna->mna_up, NR_TX),
1660 	    na_get_nslots(&mna->mna_up, NR_RX));
1661 #if CONFIG_NEXUS_USER_PIPE
1662 	SK_DF(SK_VERB_MONITOR, "  next_pipe:    %u", mna->mna_up.na_next_pipe);
1663 	SK_DF(SK_VERB_MONITOR, "  max_pipes:    %u", mna->mna_up.na_max_pipes);
1664 #endif /* CONFIG_NEXUS_USER_PIPE */
1665 	SK_DF(SK_VERB_MONITOR, "  mna_tx_rings: [%u,%u)", mna->mna_first[NR_TX],
1666 	    mna->mna_last[NR_TX]);
1667 	SK_DF(SK_VERB_MONITOR, "  mna_rx_rings: [%u,%u)", mna->mna_first[NR_RX],
1668 	    mna->mna_last[NR_RX]);
1669 	SK_DF(SK_VERB_MONITOR, "  mna_mode:     %u", mna->mna_mode);
1670 #endif /* SK_LOG */
1671 
1672 	return 0;
1673 
1674 put_out:
1675 	if (pna != NULL) {
1676 		(void) na_release_locked(pna);
1677 		pna = NULL;
1678 	}
1679 	NA_FREE(&mna->mna_up);
1680 	return error;
1681 }
1682 
1683 static void
nx_mon_quantum_copy_64x(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t unused_arg1,const uint16_t unused_arg2,const uint16_t unused_arg3,const boolean_t unused_arg4)1684 nx_mon_quantum_copy_64x(const enum txrx t, kern_packet_t dph,
1685     const uint16_t doff, kern_packet_t sph, const uint16_t soff,
1686     const uint32_t len, const boolean_t unused_arg1,
1687     const uint16_t unused_arg2, const uint16_t unused_arg3,
1688     const boolean_t unused_arg4)
1689 {
1690 	/* for function prototype parity with pkt_copy_from_pkt_t */
1691 #pragma unused(unused_arg1, unused_arg2, unused_arg3, unused_arg4)
1692 #pragma unused(t, doff, soff)
1693 	struct __kern_quantum *dqum = SK_PTR_ADDR_KQUM(dph);
1694 	struct __kern_quantum *squm = SK_PTR_ADDR_KQUM(sph);
1695 	uint8_t *sbuf, *dbuf;
1696 
1697 	ASSERT(METADATA_TYPE(squm) == NEXUS_META_TYPE_QUANTUM);
1698 	ASSERT(METADATA_TYPE(squm) == METADATA_TYPE(dqum));
1699 	VERIFY(IS_P2ALIGNED(len, 64));
1700 
1701 	MD_BUFLET_ADDR(squm, sbuf);
1702 	MD_BUFLET_ADDR(dqum, dbuf);
1703 	VERIFY(IS_P2ALIGNED(dbuf, sizeof(uint64_t)));
1704 
1705 	if (__probable(IS_P2ALIGNED(sbuf, sizeof(uint64_t)))) {
1706 		sk_copy64_64x((uint64_t *)(void *)sbuf,
1707 		    (uint64_t *)(void *)dbuf, len);
1708 	} else {
1709 		bcopy(sbuf, dbuf, len);
1710 	}
1711 	/*
1712 	 * This copy routine only copies to/from a buflet, so the length
1713 	 * is guaranteed be <= the size of a buflet.
1714 	 */
1715 	VERIFY(len <= UINT16_MAX);
1716 	METADATA_SET_LEN(dqum, (uint16_t)len, 0);
1717 }
1718