1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53
54 /*
55 * $FreeBSD$
56 *
57 * Monitors
58 *
59 * netmap monitors can be used to do monitoring of network traffic
60 * on another adapter, when the latter adapter is working in netmap mode.
61 *
62 * Monitors offer to userspace the same interface as any other netmap port,
63 * with as many pairs of netmap rings as the monitored adapter.
64 * However, only the rx rings are actually used. Each monitor rx ring receives
65 * the traffic transiting on both the tx and rx corresponding rings in the
66 * monitored adapter. During registration, the user can choose if she wants
67 * to intercept tx only, rx only, or both tx and rx traffic.
68 *
69 * If the monitor is not able to cope with the stream of frames, excess traffic
70 * will be dropped.
71 *
72 * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
73 *
74 * Monitors can be either zero-copy or copy-based.
75 *
76 * Copy monitors see the frames before they are consumed:
77 *
78 * - For tx traffic, this is when the application sends them, before they are
79 * passed down to the adapter.
80 *
81 * - For rx traffic, this is when they are received by the adapter, before
82 * they are sent up to the application, if any (note that, if no
83 * application is reading from a monitored ring, the ring will eventually
84 * fill up and traffic will stop).
85 *
86 * Zero-copy monitors only see the frames after they have been consumed:
87 *
88 * - For tx traffic, this is after the slots containing the frames have been
89 * marked as free. Note that this may happen at a considerably delay after
90 * frame transmission, since freeing of slots is often done lazily.
91 *
92 * - For rx traffic, this is after the consumer on the monitored adapter
93 * has released them. In most cases, the consumer is a userspace
94 * application which may have modified the frame contents.
95 *
96 * Several copy monitors may be active on any ring. Zero-copy monitors,
97 * instead, need exclusive access to each of the monitored rings. This may
98 * change in the future, if we implement zero-copy monitor chaining.
99 *
100 */
101
102 #include <skywalk/os_skywalk_private.h>
103 #include <skywalk/nexus/monitor/nx_monitor.h>
104
105 static int nx_mon_na_txsync(struct __kern_channel_ring *, struct proc *,
106 uint32_t);
107 static int nx_mon_na_rxsync(struct __kern_channel_ring *, struct proc *,
108 uint32_t);
109 static int nx_mon_na_krings_create(struct nexus_adapter *,
110 struct kern_channel *);
111 static void nx_mon_na_krings_delete(struct nexus_adapter *,
112 struct kern_channel *, boolean_t);
113 static uint32_t nx_mon_txrx2chmode(enum txrx);
114 static int nx_mon_kr_alloc(struct __kern_channel_ring *, uint32_t);
115 static void nx_mon_kr_dealloc(struct __kern_channel_ring *);
116 static int nx_mon_na_krings_locks(struct nexus_adapter *,
117 uint32_t[NR_TXRX], uint32_t[NR_TXRX]);
118 static void nx_mon_na_krings_unlock(struct nexus_adapter *,
119 const uint32_t[NR_TXRX], const uint32_t[NR_TXRX]);
120 static int nx_mon_enable(struct nexus_adapter *, int);
121 static void nx_mon_disable(struct nexus_adapter *);
122 static int nx_mon_add(struct __kern_channel_ring *,
123 struct __kern_channel_ring *, boolean_t);
124 static void nx_mon_del(struct __kern_channel_ring *,
125 struct __kern_channel_ring *, boolean_t);
126 static int nx_mon_na_activate_common(struct nexus_adapter *,
127 na_activate_mode_t, boolean_t);
128 static pkt_copy_from_pkt_t nx_mon_quantum_copy_64x;
129
130 static int nx_mon_zcopy_parent_sync(struct __kern_channel_ring *,
131 struct proc *, uint32_t, enum txrx);
132 static int nx_mon_zcopy_na_activate(struct nexus_adapter *, na_activate_mode_t);
133 static void nx_mon_zcopy_na_dtor(struct nexus_adapter *);
134
135 static void nx_mon_parent_sync(struct __kern_channel_ring *, struct proc *,
136 slot_idx_t, int);
137 static int nx_mon_na_activate(struct nexus_adapter *, na_activate_mode_t);
138 static void nx_mon_na_dtor(struct nexus_adapter *);
139
140 /*
141 * monitors work by replacing the nm_sync() and possibly the
142 * nm_notify() callbacks in the monitored rings.
143 */
144 static int nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *,
145 struct proc *, uint32_t);
146 static int nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *,
147 struct proc *, uint32_t);
148 static int nx_mon_parent_txsync(struct __kern_channel_ring *,
149 struct proc *, uint32_t);
150 static int nx_mon_parent_rxsync(struct __kern_channel_ring *,
151 struct proc *, uint32_t);
152 static int nx_mon_parent_notify(struct __kern_channel_ring *,
153 struct proc *, uint32_t);
154
155 static void nx_mon_dom_init(struct nxdom *);
156 static void nx_mon_dom_terminate(struct nxdom *);
157 static void nx_mon_dom_fini(struct nxdom *);
158 static int nx_mon_dom_bind_port(struct kern_nexus *, nexus_port_t *,
159 struct nxbind *, void *);
160 static int nx_mon_dom_unbind_port(struct kern_nexus *, nexus_port_t);
161 static int nx_mon_dom_connect(struct kern_nexus_domain_provider *,
162 struct kern_nexus *, struct kern_channel *, struct chreq *,
163 struct kern_channel *, struct nxbind *, struct proc *);
164 static void nx_mon_dom_disconnect(struct kern_nexus_domain_provider *,
165 struct kern_nexus *, struct kern_channel *);
166 static void nx_mon_dom_defunct(struct kern_nexus_domain_provider *,
167 struct kern_nexus *, struct kern_channel *, struct proc *);
168 static void nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *,
169 struct kern_nexus *, struct kern_channel *, boolean_t);
170
171 static int nx_mon_prov_init(struct kern_nexus_domain_provider *);
172 static int nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *,
173 const struct nxprov_params *, struct nxprov_adjusted_params *);
174 static int nx_mon_prov_params(struct kern_nexus_domain_provider *,
175 const uint32_t, const struct nxprov_params *, struct nxprov_params *,
176 struct skmem_region_params[SKMEM_REGIONS]);
177 static int nx_mon_prov_mem_new(struct kern_nexus_domain_provider *,
178 struct kern_nexus *, struct nexus_adapter *);
179 static void nx_mon_prov_fini(struct kern_nexus_domain_provider *);
180
181 static struct nexus_monitor_adapter *na_mon_alloc(zalloc_flags_t);
182 static void na_mon_free(struct nexus_adapter *);
183
184 struct nxdom nx_monitor_dom_s = {
185 .nxdom_prov_head =
186 STAILQ_HEAD_INITIALIZER(nx_monitor_dom_s.nxdom_prov_head),
187 .nxdom_type = NEXUS_TYPE_MONITOR,
188 .nxdom_md_type = NEXUS_META_TYPE_QUANTUM,
189 .nxdom_md_subtype = NEXUS_META_SUBTYPE_PAYLOAD,
190 .nxdom_name = "monitor",
191 /*
192 * The following values don't really matter much, as a monitor
193 * isn't usable on its own; we just define them as non-zeroes.
194 */
195 .nxdom_ports = {
196 .nb_def = 1,
197 .nb_min = 1,
198 .nb_max = 1,
199 },
200 .nxdom_tx_rings = {
201 .nb_def = 1,
202 .nb_min = 1,
203 .nb_max = 1,
204 },
205 .nxdom_rx_rings = {
206 .nb_def = 1,
207 .nb_min = 1,
208 .nb_max = 1,
209 },
210 .nxdom_tx_slots = {
211 .nb_def = 1,
212 .nb_min = 1,
213 .nb_max = 1,
214 },
215 .nxdom_rx_slots = {
216 .nb_def = 1,
217 .nb_min = 1,
218 .nb_max = 1,
219 },
220 .nxdom_buf_size = {
221 .nb_def = 64,
222 .nb_min = 64,
223 .nb_max = 64,
224 },
225 .nxdom_meta_size = {
226 .nb_def = NX_METADATA_OBJ_MIN_SZ,
227 .nb_min = NX_METADATA_OBJ_MIN_SZ,
228 .nb_max = NX_METADATA_USR_MAX_SZ,
229 },
230 .nxdom_stats_size = {
231 .nb_def = 0,
232 .nb_min = 0,
233 .nb_max = NX_STATS_MAX_SZ,
234 },
235 .nxdom_pipes = {
236 .nb_def = 0,
237 .nb_min = 0,
238 .nb_max = 0,
239 },
240 .nxdom_flowadv_max = {
241 .nb_def = 0,
242 .nb_min = 0,
243 .nb_max = NX_FLOWADV_MAX,
244 },
245 .nxdom_nexusadv_size = {
246 .nb_def = 0,
247 .nb_min = 0,
248 .nb_max = NX_NEXUSADV_MAX_SZ,
249 },
250 .nxdom_capabilities = {
251 .nb_def = NXPCAP_USER_CHANNEL,
252 .nb_min = NXPCAP_USER_CHANNEL,
253 .nb_max = NXPCAP_USER_CHANNEL,
254 },
255 .nxdom_qmap = {
256 .nb_def = NEXUS_QMAP_TYPE_INVALID,
257 .nb_min = NEXUS_QMAP_TYPE_INVALID,
258 .nb_max = NEXUS_QMAP_TYPE_INVALID,
259 },
260 .nxdom_max_frags = {
261 .nb_def = NX_PBUF_FRAGS_DEFAULT,
262 .nb_min = NX_PBUF_FRAGS_MIN,
263 .nb_max = NX_PBUF_FRAGS_DEFAULT,
264 },
265 .nxdom_init = nx_mon_dom_init,
266 .nxdom_terminate = nx_mon_dom_terminate,
267 .nxdom_fini = nx_mon_dom_fini,
268 .nxdom_find_port = NULL,
269 .nxdom_port_is_reserved = NULL,
270 .nxdom_bind_port = nx_mon_dom_bind_port,
271 .nxdom_unbind_port = nx_mon_dom_unbind_port,
272 .nxdom_connect = nx_mon_dom_connect,
273 .nxdom_disconnect = nx_mon_dom_disconnect,
274 .nxdom_defunct = nx_mon_dom_defunct,
275 .nxdom_defunct_finalize = nx_mon_dom_defunct_finalize,
276 };
277
278 static struct kern_nexus_domain_provider nx_monitor_prov_s = {
279 .nxdom_prov_name = NEXUS_PROVIDER_MONITOR,
280 .nxdom_prov_flags = NXDOMPROVF_DEFAULT,
281 .nxdom_prov_cb = {
282 .dp_cb_init = nx_mon_prov_init,
283 .dp_cb_fini = nx_mon_prov_fini,
284 .dp_cb_params = nx_mon_prov_params,
285 .dp_cb_mem_new = nx_mon_prov_mem_new,
286 .dp_cb_config = NULL,
287 .dp_cb_nx_ctor = NULL,
288 .dp_cb_nx_dtor = NULL,
289 .dp_cb_nx_mem_info = NULL, /* not supported */
290 .dp_cb_nx_mib_get = NULL,
291 },
292 };
293
294 static ZONE_DECLARE(na_mon_zone, SKMEM_ZONE_PREFIX ".na.mon",
295 sizeof(struct nexus_monitor_adapter), ZC_ZFREE_CLEARMEM);
296
297 #define SKMEM_TAG_MONITORS "com.apple.skywalk.monitors"
298 static kern_allocation_name_t skmem_tag_monitors;
299
300 static void
nx_mon_dom_init(struct nxdom * nxdom)301 nx_mon_dom_init(struct nxdom *nxdom)
302 {
303 SK_LOCK_ASSERT_HELD();
304 ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED));
305
306 ASSERT(skmem_tag_monitors == NULL);
307 skmem_tag_monitors =
308 kern_allocation_name_allocate(SKMEM_TAG_MONITORS, 0);
309 ASSERT(skmem_tag_monitors != NULL);
310
311 (void) nxdom_prov_add(nxdom, &nx_monitor_prov_s);
312 }
313
314 static void
nx_mon_dom_terminate(struct nxdom * nxdom)315 nx_mon_dom_terminate(struct nxdom *nxdom)
316 {
317 struct kern_nexus_domain_provider *nxdom_prov, *tnxdp;
318
319 STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head,
320 nxdom_prov_link, tnxdp) {
321 (void) nxdom_prov_del(nxdom_prov);
322 }
323
324 if (skmem_tag_monitors != NULL) {
325 kern_allocation_name_release(skmem_tag_monitors);
326 skmem_tag_monitors = NULL;
327 }
328 }
329
330 static void
nx_mon_dom_fini(struct nxdom * nxdom)331 nx_mon_dom_fini(struct nxdom *nxdom)
332 {
333 #pragma unused(nxdom)
334 }
335
336 __attribute__((noreturn))
337 static int
nx_mon_dom_bind_port(struct kern_nexus * nx,nexus_port_t * nx_port,struct nxbind * nxb,void * info)338 nx_mon_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port,
339 struct nxbind *nxb, void *info)
340 {
341 #pragma unused(nx, nx_port, nxb, info)
342 VERIFY(0);
343 /* NOTREACHED */
344 __builtin_unreachable();
345 }
346
347 __attribute__((noreturn))
348 static int
nx_mon_dom_unbind_port(struct kern_nexus * nx,nexus_port_t nx_port)349 nx_mon_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port)
350 {
351 #pragma unused(nx, nx_port)
352 VERIFY(0);
353 /* NOTREACHED */
354 __builtin_unreachable();
355 }
356
357 __attribute__((noreturn))
358 static int
nx_mon_dom_connect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)359 nx_mon_dom_connect(struct kern_nexus_domain_provider *nxdom_prov,
360 struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
361 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
362 {
363 #pragma unused(nxdom_prov, nx, ch, chr, ch0, nxb, p)
364 VERIFY(0);
365 /* NOTREACHED */
366 __builtin_unreachable();
367 }
368
369 __attribute__((noreturn))
370 static void
nx_mon_dom_disconnect(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch)371 nx_mon_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov,
372 struct kern_nexus *nx, struct kern_channel *ch)
373 {
374 #pragma unused(nxdom_prov, nx, ch)
375 VERIFY(0);
376 /* NOTREACHED */
377 __builtin_unreachable();
378 }
379
380 static void
nx_mon_dom_defunct(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,struct proc * p)381 nx_mon_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov,
382 struct kern_nexus *nx, struct kern_channel *ch, struct proc *p)
383 {
384 #pragma unused(nxdom_prov, nx, ch, p)
385 }
386
387 static void
nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct kern_channel * ch,boolean_t locked)388 nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov,
389 struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked)
390 {
391 #pragma unused(nxdom_prov, nx, ch, locked)
392 }
393
394 static int
nx_mon_prov_init(struct kern_nexus_domain_provider * nxdom_prov)395 nx_mon_prov_init(struct kern_nexus_domain_provider *nxdom_prov)
396 {
397 #pragma unused(nxdom_prov)
398 SK_D("initializing %s", nxdom_prov->nxdom_prov_name);
399 return 0;
400 }
401
402 static int
nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider * nxdom_prov,const struct nxprov_params * nxp,struct nxprov_adjusted_params * adj)403 nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov,
404 const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj)
405 {
406 #pragma unused(nxdom_prov, nxp, adj)
407
408 return 0;
409 }
410
411 static int
nx_mon_prov_params(struct kern_nexus_domain_provider * nxdom_prov,const uint32_t req,const struct nxprov_params * nxp0,struct nxprov_params * nxp,struct skmem_region_params srp[SKMEM_REGIONS])412 nx_mon_prov_params(struct kern_nexus_domain_provider *nxdom_prov,
413 const uint32_t req, const struct nxprov_params *nxp0,
414 struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS])
415 {
416 struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom;
417
418 return nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp,
419 nxdom, nxdom, nxdom, nx_mon_prov_params_adjust);
420 }
421
422 static int
nx_mon_prov_mem_new(struct kern_nexus_domain_provider * nxdom_prov,struct kern_nexus * nx,struct nexus_adapter * na)423 nx_mon_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov,
424 struct kern_nexus *nx, struct nexus_adapter *na)
425 {
426 #pragma unused(nxdom_prov)
427 int err = 0;
428
429 SK_DF(SK_VERB_MONITOR,
430 "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)", SK_KVA(nx),
431 NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name,
432 SK_KVA(na));
433
434 ASSERT(na->na_arena == NULL);
435 ASSERT(NX_USER_CHANNEL_PROV(nx));
436 /*
437 * The underlying nexus adapter uses the same memory allocator
438 * as the monitored adapter; don't store the pp in the nexus.
439 *
440 * This means that clients calling kern_nexus_get_pbufpool()
441 * will get NULL, but this is fine since we don't expose the
442 * monitor to external kernel clients.
443 */
444 na->na_arena = skmem_arena_create_for_nexus(na,
445 NX_PROV(nx)->nxprov_region_params, NULL, NULL, FALSE,
446 FALSE, NULL, &err);
447 ASSERT(na->na_arena != NULL || err != 0);
448
449 return err;
450 }
451
452 static void
nx_mon_prov_fini(struct kern_nexus_domain_provider * nxdom_prov)453 nx_mon_prov_fini(struct kern_nexus_domain_provider *nxdom_prov)
454 {
455 #pragma unused(nxdom_prov)
456 SK_D("destroying %s", nxdom_prov->nxdom_prov_name);
457 }
458
459 static struct nexus_monitor_adapter *
na_mon_alloc(zalloc_flags_t how)460 na_mon_alloc(zalloc_flags_t how)
461 {
462 struct nexus_monitor_adapter *mna;
463
464 _CASSERT(offsetof(struct nexus_monitor_adapter, mna_up) == 0);
465
466 mna = zalloc_flags(na_mon_zone, how | Z_ZERO);
467 if (mna) {
468 mna->mna_up.na_type = NA_MONITOR;
469 mna->mna_up.na_free = na_mon_free;
470 }
471 return mna;
472 }
473
474 static void
na_mon_free(struct nexus_adapter * na)475 na_mon_free(struct nexus_adapter *na)
476 {
477 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
478
479 ASSERT(mna->mna_up.na_refcount == 0);
480 SK_DF(SK_VERB_MEM, "mna 0x%llx FREE", SK_KVA(mna));
481 bzero(mna, sizeof(*mna));
482 zfree(na_mon_zone, mna);
483 }
484
485 /*
486 * Functions common to both kind of monitors.
487 */
488
489 /*
490 * nm_sync callback for the monitor's own tx rings.
491 * This makes no sense and always returns error
492 */
493 static int
nx_mon_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)494 nx_mon_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
495 uint32_t flags)
496 {
497 #pragma unused(kring, p, flags)
498 SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_TX,
499 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
500 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
501 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
502 flags);
503 return EIO;
504 }
505
506 /*
507 * nm_sync callback for the monitor's own rx rings.
508 * Note that the lock in nx_mon_zcopy_parent_sync only protects
509 * writers among themselves. Synchronization between writers
510 * (i.e., nx_mon_zcopy_parent_txsync and nx_mon_zcopy_parent_rxsync)
511 * and readers (i.e., nx_mon_zcopy_parent_rxsync) relies on memory barriers.
512 */
513 static int
nx_mon_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)514 nx_mon_na_rxsync(struct __kern_channel_ring *kring, struct proc *p,
515 uint32_t flags)
516 {
517 #pragma unused(p, flags)
518 SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_RX,
519 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
520 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
521 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
522 flags);
523 kring->ckr_khead = kring->ckr_rhead;
524 membar_sync();
525 return 0;
526 }
527
528 /*
529 * na_krings_create callbacks for monitors.
530 * We could use the default netmap_hw_krings_zmon, but
531 * we don't need the nx_mbq.
532 */
533 static int
nx_mon_na_krings_create(struct nexus_adapter * na,struct kern_channel * ch)534 nx_mon_na_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
535 {
536 ASSERT(na->na_type == NA_MONITOR);
537 return na_rings_mem_setup(na, 0, FALSE, ch);
538 }
539
540 /* na_krings_delete callback for monitors */
541 static void
nx_mon_na_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)542 nx_mon_na_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
543 boolean_t defunct)
544 {
545 ASSERT(na->na_type == NA_MONITOR);
546 na_rings_mem_teardown(na, ch, defunct);
547 }
548
549 __attribute__((always_inline))
550 static inline uint32_t
nx_mon_txrx2chmode(enum txrx t)551 nx_mon_txrx2chmode(enum txrx t)
552 {
553 return t == NR_RX ? CHMODE_MONITOR_RX : CHMODE_MONITOR_TX;
554 }
555
556 /* allocate the monitors array in the monitored kring */
557 static int
nx_mon_kr_alloc(struct __kern_channel_ring * kring,uint32_t n)558 nx_mon_kr_alloc(struct __kern_channel_ring *kring, uint32_t n)
559 {
560 struct __kern_channel_ring **nm;
561 size_t len, oldlen;
562
563 if (n <= kring->ckr_max_monitors) {
564 /* we already have more entries that requested */
565 return 0;
566 }
567
568 oldlen = sizeof(struct __kern_channel_ring *) * kring->ckr_max_monitors;
569 len = sizeof(struct __kern_channel_ring *) * n;
570 nm = sk_realloc(kring->ckr_monitors, oldlen, len, Z_WAITOK, skmem_tag_monitors);
571 if (nm == NULL) {
572 return ENOMEM;
573 }
574
575 kring->ckr_monitors = nm;
576 kring->ckr_max_monitors = n;
577
578 return 0;
579 }
580
581 /* deallocate the parent array in the parent adapter */
582 static void
nx_mon_kr_dealloc(struct __kern_channel_ring * kring)583 nx_mon_kr_dealloc(struct __kern_channel_ring *kring)
584 {
585 if (kring->ckr_monitors != NULL) {
586 if (kring->ckr_n_monitors > 0) {
587 SK_ERR("freeing not empty monitor array for \"%s\" "
588 "(%u dangling monitors)!", kring->ckr_name,
589 kring->ckr_n_monitors);
590 }
591 sk_free(kring->ckr_monitors,
592 sizeof(struct __kern_channel_ring *) * kring->ckr_max_monitors);
593 kring->ckr_monitors = NULL;
594 kring->ckr_max_monitors = 0;
595 kring->ckr_n_monitors = 0;
596 }
597 }
598
599 static int
nx_mon_na_krings_locks(struct nexus_adapter * na,uint32_t qfirst[NR_TXRX],uint32_t qlast[NR_TXRX])600 nx_mon_na_krings_locks(struct nexus_adapter *na,
601 uint32_t qfirst[NR_TXRX], uint32_t qlast[NR_TXRX])
602 {
603 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
604 struct nexus_adapter *pna = mna->mna_pna;
605 enum txrx t;
606 int err = 0;
607
608 for_rx_tx(t) {
609 uint32_t i;
610
611 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
612 continue;
613 }
614
615 qfirst[t] = qlast[t] = mna->mna_first[t];
616
617 /* synchronize with concurrently running nm_sync()s */
618 for (i = mna->mna_first[t]; i < mna->mna_last[t]; i++) {
619 struct __kern_channel_ring *kring;
620
621 /* the parent adapter's kring */
622 kring = &NAKR(pna, t)[i];
623 kr_stop(kring, KR_LOCKED);
624 qlast[t] = i + 1;
625 }
626 if (err != 0) {
627 break;
628 }
629 }
630
631 return err;
632 }
633
634 static void
nx_mon_na_krings_unlock(struct nexus_adapter * na,const uint32_t qfirst[NR_TXRX],const uint32_t qlast[NR_TXRX])635 nx_mon_na_krings_unlock(struct nexus_adapter *na,
636 const uint32_t qfirst[NR_TXRX], const uint32_t qlast[NR_TXRX])
637 {
638 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
639 struct nexus_adapter *pna = mna->mna_pna;
640 enum txrx t;
641
642 for_rx_tx(t) {
643 uint32_t i;
644
645 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
646 continue;
647 }
648
649 /* synchronize with concurrently running nm_sync()s */
650 for (i = qfirst[t]; i < qlast[t]; i++) {
651 struct __kern_channel_ring *kring;
652
653 /* the parent adapter's kring */
654 kring = &NAKR(pna, t)[i];
655 kr_start(kring);
656 }
657 }
658 }
659
660 static int
nx_mon_enable(struct nexus_adapter * na,boolean_t zcopy)661 nx_mon_enable(struct nexus_adapter *na, boolean_t zcopy)
662 {
663 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
664 struct nexus_adapter *pna = mna->mna_pna;
665 struct skmem_arena_nexus *na_arena = skmem_arena_nexus(pna->na_arena);
666 uint32_t qfirst[NR_TXRX], qlast[NR_TXRX];
667 enum txrx t;
668 int err = 0;
669 uint32_t i;
670
671 ASSERT(!(na->na_flags & NAF_ACTIVE));
672
673 bzero(&qfirst, sizeof(qfirst));
674 bzero(&qlast, sizeof(qlast));
675
676 /*
677 * Acquire the target kring(s). q{first,last}0 represent the
678 * target ring set. q{first,last} represent the ones that have
679 * been successfully acquired. In the event the acquisition
680 * fails, we must release any previously-acquired rings.
681 */
682 if ((err = nx_mon_na_krings_locks(na, qfirst, qlast)) != 0) {
683 goto unlock;
684 }
685
686 ASSERT(na_arena->arn_rx_pp == na_arena->arn_tx_pp);
687 if (na_arena->arn_rx_pp->pp_max_frags > 1) {
688 VERIFY(na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET);
689 mna->mna_pkt_copy_from_pkt = pkt_copy_multi_buflet_from_pkt;
690 } else {
691 if (na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET) {
692 mna->mna_pkt_copy_from_pkt = pkt_copy_from_pkt;
693 } else {
694 mna->mna_pkt_copy_from_pkt = nx_mon_quantum_copy_64x;
695 }
696 }
697
698 for_rx_tx(t) {
699 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
700 continue;
701 }
702
703 for (i = qfirst[t]; i < qlast[t]; i++) {
704 struct __kern_channel_ring *kring, *mkring;
705
706 /* the parent adapter's kring */
707 kring = &NAKR(pna, t)[i];
708 mkring = &na->na_rx_rings[i];
709 err = nx_mon_add(mkring, kring, zcopy);
710 if (err != 0) {
711 break;
712 }
713 }
714 if (err != 0) {
715 break;
716 }
717 }
718
719 if (err == 0) {
720 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
721 goto unlock;
722 }
723
724 for_rx_tx(t) {
725 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
726 continue;
727 }
728
729 for (i = qfirst[t]; i < qlast[t]; i++) {
730 struct __kern_channel_ring *kring, *mkring;
731
732 /* the parent adapter's kring */
733 kring = &NAKR(pna, t)[i];
734 mkring = &na->na_rx_rings[i];
735 nx_mon_del(mkring, kring, FALSE);
736 }
737 }
738 ASSERT(!(na->na_flags & NAF_ACTIVE));
739
740 unlock:
741 nx_mon_na_krings_unlock(na, qfirst, qlast);
742
743 SK_DF(err ? SK_VERB_ERROR : SK_VERB_MONITOR,
744 "%s (0x%llx): mode 0x%x txrings[%u,%u], rxrings[%u,%u] err %d",
745 na->na_name, SK_KVA(na), mna->mna_mode, qfirst[NR_TX], qlast[NR_TX],
746 qfirst[NR_RX], qlast[NR_RX], err);
747
748 return err;
749 }
750
751 static void
nx_mon_disable(struct nexus_adapter * na)752 nx_mon_disable(struct nexus_adapter *na)
753 {
754 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
755 struct nexus_adapter *pna = mna->mna_pna;
756 uint32_t qfirst[NR_TXRX], qlast[NR_TXRX];
757 enum txrx t;
758 int err;
759 uint32_t i;
760
761 ASSERT(na->na_flags & NAF_ACTIVE);
762
763 bzero(&qfirst, sizeof(qfirst));
764 bzero(&qlast, sizeof(qlast));
765
766 /* blocking kring(s) acquisition; must not fail */
767 err = nx_mon_na_krings_locks(na, qfirst, qlast);
768 ASSERT(err == 0);
769 mna->mna_pkt_copy_from_pkt = NULL;
770 for_rx_tx(t) {
771 if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) {
772 continue;
773 }
774
775 for (i = qfirst[t]; i < qlast[t]; i++) {
776 struct __kern_channel_ring *kring, *mkring;
777
778 kring = &NAKR(pna, t)[i];
779 mkring = &na->na_rx_rings[i];
780 nx_mon_del(mkring, kring, FALSE);
781 }
782 }
783 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
784
785 nx_mon_na_krings_unlock(na, qfirst, qlast);
786 }
787
788 /*
789 * Add the monitor mkring to the list of monitors of kring.
790 * If this is the first monitor, intercept the callbacks
791 */
792 static int
nx_mon_add(struct __kern_channel_ring * mkring,struct __kern_channel_ring * kring,boolean_t zcopy)793 nx_mon_add(struct __kern_channel_ring *mkring,
794 struct __kern_channel_ring *kring, boolean_t zcopy)
795 {
796 int error;
797
798 /* make sure the monitor array exists and is big enough */
799 error = nx_mon_kr_alloc(kring, kring->ckr_n_monitors + 1);
800 if (error != 0) {
801 return error;
802 }
803
804 kring->ckr_monitors[kring->ckr_n_monitors] = mkring;
805 mkring->ckr_mon_pos = kring->ckr_n_monitors;
806 kring->ckr_n_monitors++;
807 if (kring->ckr_n_monitors == 1) {
808 /* this is the first monitor, intercept callbacks */
809 SK_DF(SK_VERB_MONITOR,
810 "mkr \"%s\" (0x%llx) krflags 0x%b intercept callbacks "
811 "on kr \"%s\" (0x%llx) krflags 0x%b", mkring->ckr_name,
812 SK_KVA(mkring), mkring->ckr_flags, CKRF_BITS,
813 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
814 CKRF_BITS);
815 kring->ckr_mon_sync = kring->ckr_na_sync;
816 /*
817 * zcopy monitors do not override nm_notify(), but
818 * we save the original one regardless, so that
819 * nx_mon_del() does not need to know the
820 * monitor type
821 */
822 kring->ckr_mon_notify = kring->ckr_na_notify;
823 if (kring->ckr_tx == NR_TX) {
824 kring->ckr_na_sync =
825 (zcopy ? nx_mon_zcopy_parent_txsync :
826 nx_mon_parent_txsync);
827 } else {
828 kring->ckr_na_sync =
829 (zcopy ? nx_mon_zcopy_parent_rxsync :
830 nx_mon_parent_rxsync);
831 if (!zcopy) {
832 /* also intercept notify */
833 kring->ckr_na_notify = nx_mon_parent_notify;
834 kring->ckr_mon_tail = kring->ckr_ktail;
835 }
836 }
837 } else {
838 SK_DF(SK_VERB_MONITOR,
839 "mkr \"%s\" (0x%llx) krflags 0x%b already intercept "
840 "callbacks on kr \"%s\" (0x%llx) krflags 0x%b, "
841 "%u monitors", mkring->ckr_name, SK_KVA(mkring),
842 mkring->ckr_flags, CKRF_BITS, kring->ckr_name,
843 SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
844 kring->ckr_n_monitors);
845 }
846 return 0;
847 }
848
849 /*
850 * Remove the monitor mkring from the list of monitors of kring.
851 * If this is the last monitor, restore the original callbacks
852 */
853 static void
nx_mon_del(struct __kern_channel_ring * mkring,struct __kern_channel_ring * kring,boolean_t all)854 nx_mon_del(struct __kern_channel_ring *mkring,
855 struct __kern_channel_ring *kring, boolean_t all)
856 {
857 ASSERT(kring->ckr_n_monitors != 0);
858 if (all) {
859 kring->ckr_n_monitors = 0;
860 } else {
861 kring->ckr_n_monitors--;
862 if (mkring->ckr_mon_pos != kring->ckr_n_monitors) {
863 kring->ckr_monitors[mkring->ckr_mon_pos] =
864 kring->ckr_monitors[kring->ckr_n_monitors];
865 kring->ckr_monitors[mkring->ckr_mon_pos]->ckr_mon_pos =
866 mkring->ckr_mon_pos;
867 }
868 kring->ckr_monitors[kring->ckr_n_monitors] = NULL;
869 }
870 if (kring->ckr_n_monitors == 0) {
871 /*
872 * This was the last monitor, restore callbacks
873 * and delete monitor array.
874 */
875 SK_DF(SK_VERB_MONITOR,
876 "restoring sync callback on kr \"%s\" (0x%llx) "
877 "krflags 0x%b", kring->ckr_name, SK_KVA(kring),
878 kring->ckr_flags, CKRF_BITS);
879 kring->ckr_na_sync = kring->ckr_mon_sync;
880 kring->ckr_mon_sync = NULL;
881 if (kring->ckr_tx == NR_RX) {
882 SK_DF(SK_VERB_MONITOR,
883 "restoring notify callback on kr \"%s\" (0x%llx) "
884 "krflags 0x%b", kring->ckr_name, SK_KVA(kring),
885 kring->ckr_flags, CKRF_BITS);
886 kring->ckr_na_notify = kring->ckr_mon_notify;
887 kring->ckr_mon_notify = NULL;
888 }
889 nx_mon_kr_dealloc(kring);
890 } else {
891 SK_DF(SK_VERB_MONITOR,
892 "NOT restoring callbacks on kr \"%s\" (0x%llx) "
893 "krflags 0x%b, %u monitors left", kring->ckr_name,
894 SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
895 kring->ckr_n_monitors);
896 }
897 }
898
899 /*
900 * This is called when the monitored adapter leaves skywalk mode (see
901 * na_unbind_channel). We need to notify the monitors that the monitored
902 * rings are gone. We do this by setting their mna->mna_pna to NULL.
903 * Note that the rings must be stopped when this happens, so no monitor
904 * ring callback can be active.
905 */
906 void
nx_mon_stop(struct nexus_adapter * na)907 nx_mon_stop(struct nexus_adapter *na)
908 {
909 enum txrx t;
910
911 SK_LOCK_ASSERT_HELD();
912
913 /* skip if this adapter has no allocated rings */
914 if (na->na_tx_rings == NULL) {
915 return;
916 }
917
918 na_disable_all_rings(na);
919
920 for_rx_tx(t) {
921 uint32_t i;
922
923 for (i = 0; i < na_get_nrings(na, t); i++) {
924 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
925 uint32_t j;
926
927 for (j = 0; j < kring->ckr_n_monitors; j++) {
928 struct __kern_channel_ring *mkring =
929 kring->ckr_monitors[j];
930 struct nexus_monitor_adapter *mna =
931 (struct nexus_monitor_adapter *)
932 KRNA(mkring);
933
934 /* forget about this adapter */
935 if (mna->mna_pna != NULL) {
936 ASSERT(na == mna->mna_pna);
937 (void) na_release_locked(mna->mna_pna);
938 mna->mna_pna = NULL;
939 }
940 }
941
942 /*
943 * Remove all monitors and restore callbacks;
944 * this is important for nexus adapters that
945 * are linked to one another, e.g. pipe, since
946 * the callback changes on one adapter affects
947 * its peer during sync times.
948 */
949 if (kring->ckr_n_monitors > 0) {
950 nx_mon_del(NULL, kring, TRUE);
951 }
952
953 ASSERT(kring->ckr_monitors == NULL);
954 ASSERT(kring->ckr_max_monitors == 0);
955 ASSERT(kring->ckr_n_monitors == 0);
956 }
957 }
958
959 na_enable_all_rings(na);
960 }
961
962 /*
963 * Common functions for the na_activate() callbacks of both kind of
964 * monitors.
965 */
966 static int
nx_mon_na_activate_common(struct nexus_adapter * na,na_activate_mode_t mode,boolean_t zcopy)967 nx_mon_na_activate_common(struct nexus_adapter *na, na_activate_mode_t mode,
968 boolean_t zcopy)
969 {
970 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
971 struct nexus_adapter *pna = mna->mna_pna;
972 int err = 0;
973
974 ASSERT(na->na_type == NA_MONITOR);
975
976 SK_DF(SK_VERB_MONITOR, "na \"%s\" (0x%llx) %s zcopy %u", na->na_name,
977 SK_KVA(na), na_activate_mode2str(mode), zcopy);
978
979 switch (mode) {
980 case NA_ACTIVATE_MODE_ON:
981 if (pna == NULL) {
982 /* parent left skywalk mode, fatal */
983 SK_ERR("%s: internal error", na->na_name);
984 err = ENXIO;
985 } else {
986 err = nx_mon_enable(na, zcopy);
987 }
988 break;
989
990 case NA_ACTIVATE_MODE_DEFUNCT:
991 break;
992
993 case NA_ACTIVATE_MODE_OFF:
994 if (pna == NULL) {
995 SK_DF(SK_VERB_MONITOR, "%s: parent left skywalk mode, "
996 "nothing to restore", na->na_name);
997 } else {
998 nx_mon_disable(na);
999 }
1000 break;
1001
1002 default:
1003 VERIFY(0);
1004 /* NOTREACHED */
1005 __builtin_unreachable();
1006 }
1007
1008 return err;
1009 }
1010
1011 /*
1012 * Functions specific for zero-copy monitors.
1013 */
1014
1015 /*
1016 * Common function for both zero-copy tx and rx nm_sync()
1017 * callbacks
1018 */
1019 static int
nx_mon_zcopy_parent_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,enum txrx tx)1020 nx_mon_zcopy_parent_sync(struct __kern_channel_ring *kring, struct proc *p,
1021 uint32_t flags, enum txrx tx)
1022 {
1023 struct __kern_channel_ring *mkring = kring->ckr_monitors[0];
1024 int rel_slots, free_slots, busy, sent = 0;
1025 slot_idx_t beg, end, i;
1026 const slot_idx_t lim = kring->ckr_lim;
1027 const slot_idx_t mlim;
1028 int error = 0;
1029
1030 if (mkring == NULL) {
1031 SK_RD(5, "NULL monitor on kr \"%s\" (0x%llx) krflags 0x%b",
1032 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1033 CKRF_BITS);
1034 return 0;
1035 }
1036
1037 ASSERT(!KR_KERNEL_ONLY(kring));
1038 ASSERT(!KR_KERNEL_ONLY(mkring));
1039
1040 /* deconst */
1041 *(slot_idx_t *)(uintptr_t)&mlim = mkring->ckr_lim;
1042
1043 /* get the relased slots (rel_slots) */
1044 if (tx == NR_TX) {
1045 beg = kring->ckr_ktail;
1046 error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1047 if (error) {
1048 return error;
1049 }
1050 end = kring->ckr_ktail;
1051 } else { /* NR_RX */
1052 beg = kring->ckr_khead;
1053 end = kring->ckr_rhead;
1054 }
1055
1056 rel_slots = end - beg;
1057 if (rel_slots < 0) {
1058 rel_slots += kring->ckr_num_slots;
1059 }
1060
1061 if (!rel_slots) {
1062 /*
1063 * No released slots, but we still need
1064 * to call rxsync if this is a rx ring
1065 */
1066 goto out_rxsync;
1067 }
1068
1069 /*
1070 * We need to lock the monitor receive ring, since it
1071 * is the target of bot tx and rx traffic from the monitored
1072 * adapter
1073 */
1074 KR_LOCK(mkring);
1075 /* get the free slots available on the monitor ring */
1076 i = mkring->ckr_ktail;
1077 busy = i - mkring->ckr_khead;
1078 if (busy < 0) {
1079 busy += mkring->ckr_num_slots;
1080 }
1081 free_slots = mlim - busy;
1082
1083 if (!free_slots) {
1084 goto out;
1085 }
1086
1087 /* swap min(free_slots, rel_slots) slots */
1088 if (free_slots < rel_slots) {
1089 beg += (rel_slots - free_slots);
1090 if (beg >= kring->ckr_num_slots) {
1091 beg -= kring->ckr_num_slots;
1092 }
1093 rel_slots = free_slots;
1094 }
1095
1096 sent = rel_slots;
1097 for (; rel_slots; rel_slots--) {
1098 /*
1099 * Swap the slots.
1100 *
1101 * XXX: [email protected] -- this bypasses the slot attach/detach
1102 * interface, and needs to be changed when monitor adopts the
1103 * packet APIs. SD_SWAP() will perform a block copy of the
1104 * swap, and will readjust the kernel slot descriptor's sd_user
1105 * accordingly.
1106 */
1107 SD_SWAP(KR_KSD(mkring, i), KR_USD(mkring, i),
1108 KR_KSD(kring, beg), KR_USD(kring, beg));
1109
1110 SK_RD(5, "beg %u buf_idx %u", beg,
1111 METADATA_IDX(KR_KSD(kring, beg)->sd_qum));
1112
1113 beg = SLOT_NEXT(beg, lim);
1114 i = SLOT_NEXT(i, mlim);
1115 }
1116 membar_sync();
1117 mkring->ckr_ktail = i;
1118
1119 out:
1120 KR_UNLOCK(mkring);
1121
1122 if (sent) {
1123 /* notify the new frames to the monitor */
1124 (void) mkring->ckr_na_notify(mkring, p, 0);
1125 }
1126
1127 out_rxsync:
1128 if (tx == NR_RX) {
1129 error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1130 }
1131
1132 return error;
1133 }
1134
1135 /*
1136 * Callback used to replace the ckr_na_sync callback in the monitored tx rings.
1137 */
1138 static int
nx_mon_zcopy_parent_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1139 nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *kring, struct proc *p,
1140 uint32_t flags)
1141 {
1142 SK_DF(SK_VERB_MONITOR,
1143 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x",
1144 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1145 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1146 return nx_mon_zcopy_parent_sync(kring, p, flags, NR_TX);
1147 }
1148
1149 /* callback used to replace the nm_sync callback in the monitored rx rings */
1150 static int
nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1151 nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1152 uint32_t flags)
1153 {
1154 SK_DF(SK_VERB_MONITOR,
1155 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x",
1156 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
1157 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1158 return nx_mon_zcopy_parent_sync(kring, p, flags, NR_RX);
1159 }
1160
1161 static int
nx_mon_zcopy_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1162 nx_mon_zcopy_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1163 {
1164 return nx_mon_na_activate_common(na, mode, TRUE /* zcopy */);
1165 }
1166
1167 /* na_dtor callback for monitors */
1168 static void
nx_mon_zcopy_na_dtor(struct nexus_adapter * na)1169 nx_mon_zcopy_na_dtor(struct nexus_adapter *na)
1170 {
1171 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
1172 struct nexus_adapter *pna = mna->mna_pna;
1173
1174 SK_LOCK_ASSERT_HELD();
1175 ASSERT(na->na_type == NA_MONITOR);
1176
1177 if (pna != NULL) {
1178 (void) na_release_locked(pna);
1179 mna->mna_pna = NULL;
1180 }
1181 }
1182
1183 /*
1184 * Functions specific for copy monitors.
1185 */
1186
1187 static void
nx_mon_parent_sync(struct __kern_channel_ring * kring,struct proc * p,slot_idx_t first_new,int new_slots)1188 nx_mon_parent_sync(struct __kern_channel_ring *kring, struct proc *p,
1189 slot_idx_t first_new, int new_slots)
1190 {
1191 nexus_meta_type_t md_type = KRNA(kring)->na_md_type;
1192 uint32_t j;
1193
1194 for (j = 0; j < kring->ckr_n_monitors; j++) {
1195 struct __kern_channel_ring *mkring = kring->ckr_monitors[j];
1196 slot_idx_t i, mlim, beg;
1197 int free_slots, busy, sent = 0, m;
1198 const slot_idx_t lim = kring->ckr_lim;
1199 struct nexus_adapter *dst_na = KRNA(mkring);
1200 struct nexus_monitor_adapter *mna =
1201 (struct nexus_monitor_adapter *)dst_na;
1202 uint32_t max_len = mkring->ckr_pp->pp_max_frags *
1203 mkring->ckr_pp->pp_buflet_size;
1204
1205 /*
1206 * src and dst adapters must share the same nexus;
1207 * this test is done in nx_monitor_na_find(). This
1208 * covers both buffer and metadata sizes.
1209 */
1210
1211 mlim = mkring->ckr_lim;
1212
1213 /*
1214 * We need to lock the monitor receive ring, since it
1215 * is the target of both tx and rx traffics from the
1216 * monitored adapter.
1217 */
1218 KR_LOCK(mkring);
1219 /* get the free slots available on the monitor ring */
1220 i = mkring->ckr_ktail;
1221 busy = i - mkring->ckr_khead;
1222 if (busy < 0) {
1223 busy += mkring->ckr_num_slots;
1224 }
1225 free_slots = mlim - busy;
1226
1227 if (!free_slots) {
1228 goto out;
1229 }
1230
1231 /* copy min(free_slots, new_slots) slots */
1232 m = new_slots;
1233 beg = first_new;
1234 if (free_slots < m) {
1235 beg += (m - free_slots);
1236 if (beg >= kring->ckr_num_slots) {
1237 beg -= kring->ckr_num_slots;
1238 }
1239 m = free_slots;
1240 }
1241
1242 ASSERT(KRNA(mkring)->na_md_type == md_type);
1243
1244 for (; m; m--) {
1245 struct __kern_slot_desc *src_sd = KR_KSD(kring, beg);
1246 struct __kern_slot_desc *dst_sd = KR_KSD(mkring, i);
1247 struct __kern_packet *spkt, *dpkt;
1248 kern_packet_t sph, dph;
1249 uint32_t copy_len;
1250
1251 if (!KSD_VALID_METADATA(src_sd)) {
1252 goto skip;
1253 }
1254
1255 /* retreive packet handles from slot */
1256 spkt = src_sd->sd_pkt;
1257 sph = SK_PTR_ENCODE(spkt, METADATA_TYPE(spkt),
1258 METADATA_SUBTYPE(spkt));
1259 dpkt = dst_sd->sd_pkt;
1260 dph = SK_PTR_ENCODE(dpkt, METADATA_TYPE(dpkt),
1261 METADATA_SUBTYPE(dpkt));
1262
1263 ASSERT(METADATA_TYPE(spkt) == METADATA_TYPE(dpkt));
1264
1265 ASSERT(spkt->pkt_qum.qum_len <= (UINT32_MAX - 63));
1266 copy_len = spkt->pkt_qum.qum_len;
1267
1268 /* round to a multiple of 64 */
1269 copy_len = (copy_len + 63) & ~63;
1270
1271 if (__improbable(copy_len > max_len)) {
1272 SK_RD(5, "kr \"%s\" -> mkr \"%s\": "
1273 "truncating %u to %u",
1274 kring->ckr_name, mkring->ckr_name,
1275 (uint32_t)copy_len, max_len);
1276 copy_len = max_len;
1277 }
1278
1279 /* copy buffers */
1280 mna->mna_pkt_copy_from_pkt(kring->ckr_tx, dph, 0, sph,
1281 0, copy_len, FALSE, 0, 0, FALSE);
1282
1283 /* copy the associated meta data */
1284 _QUM_COPY(&(spkt)->pkt_qum, &(dpkt)->pkt_qum);
1285 if (md_type == NEXUS_META_TYPE_PACKET) {
1286 _PKT_COPY(spkt, dpkt);
1287 ASSERT(dpkt->pkt_mbuf == NULL);
1288 }
1289
1290 ASSERT(!(dpkt->pkt_qum.qum_qflags & QUM_F_KERNEL_ONLY) ||
1291 PP_KERNEL_ONLY(dpkt->pkt_qum.qum_pp));
1292
1293 sent++;
1294 i = SLOT_NEXT(i, mlim);
1295 skip:
1296 beg = SLOT_NEXT(beg, lim);
1297 }
1298 membar_sync();
1299 mkring->ckr_ktail = i;
1300 out:
1301 KR_UNLOCK(mkring);
1302
1303 if (sent) {
1304 /* notify the new frames to the monitor */
1305 (void) mkring->ckr_na_notify(mkring, p, 0);
1306 }
1307 }
1308 }
1309
1310 /* callback used to replace the nm_sync callback in the monitored tx rings */
1311 static int
nx_mon_parent_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1312 nx_mon_parent_txsync(struct __kern_channel_ring *kring, struct proc *p,
1313 uint32_t flags)
1314 {
1315 slot_idx_t first_new;
1316 int new_slots;
1317 nexus_type_t nx_type =
1318 kring->ckr_na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
1319
1320 /*
1321 * For user pipe nexus, txsync can also be initated from RX process
1322 * context, hence user pipe tx ring should be accessed holding
1323 * ckr_qlock.
1324 */
1325 if (nx_type == NEXUS_TYPE_USER_PIPE) {
1326 KR_LOCK(kring);
1327 }
1328
1329 /* get the new slots */
1330 first_new = kring->ckr_khead;
1331 new_slots = kring->ckr_rhead - first_new;
1332 if (new_slots < 0) {
1333 new_slots += kring->ckr_num_slots;
1334 }
1335 if (new_slots) {
1336 nx_mon_parent_sync(kring, p, first_new, new_slots);
1337 }
1338
1339 if (nx_type == NEXUS_TYPE_USER_PIPE) {
1340 KR_UNLOCK(kring);
1341 }
1342
1343 return kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1344 }
1345
1346 /* callback used to replace the nm_sync callback in the monitored rx rings */
1347 static int
nx_mon_parent_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1348 nx_mon_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1349 uint32_t flags)
1350 {
1351 slot_idx_t first_new;
1352 int new_slots, error;
1353
1354 /* get the new slots */
1355 error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags);
1356 if (error) {
1357 return error;
1358 }
1359 first_new = kring->ckr_mon_tail;
1360 new_slots = kring->ckr_ktail - first_new;
1361 if (new_slots < 0) {
1362 new_slots += kring->ckr_num_slots;
1363 }
1364 if (new_slots) {
1365 nx_mon_parent_sync(kring, p, first_new, new_slots);
1366 }
1367 kring->ckr_mon_tail = kring->ckr_ktail;
1368 return 0;
1369 }
1370
1371 /*
1372 * Callback used to replace the nm_notify() callback in the monitored rx rings
1373 */
1374 static int
nx_mon_parent_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1375 nx_mon_parent_notify(struct __kern_channel_ring *kring, struct proc *p,
1376 uint32_t flags)
1377 {
1378 int err = 0;
1379 sk_protect_t protect = NULL;
1380
1381 SK_DF(SK_VERB_MONITOR | SK_VERB_NOTIFY |
1382 ((kring->ckr_tx == NR_TX) ? SK_VERB_TX : SK_VERB_RX),
1383 "kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x", kring->ckr_name,
1384 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags);
1385 /*
1386 * ?xsync callbacks have tryget called by their callers,
1387 * but here we have to call it by ourself. If we can't
1388 * acquire the exclusive sync right, skip the sync.
1389 */
1390 if ((err = kr_enter(kring, FALSE)) == 0) {
1391 protect = sk_sync_protect();
1392 nx_mon_parent_rxsync(kring, p, NA_SYNCF_FORCE_READ);
1393 sk_sync_unprotect(protect);
1394 kr_exit(kring);
1395 }
1396 /* in all cases (even error), we must invoke notify */
1397 kring->ckr_mon_notify(kring, p, (NA_NOTEF_MONITOR | flags));
1398 return err;
1399 }
1400
1401 static int
nx_mon_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)1402 nx_mon_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
1403 {
1404 return nx_mon_na_activate_common(na, mode, FALSE /* no zcopy */);
1405 }
1406
1407 static void
nx_mon_na_dtor(struct nexus_adapter * na)1408 nx_mon_na_dtor(struct nexus_adapter *na)
1409 {
1410 struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na;
1411 struct nexus_adapter *pna = mna->mna_pna;
1412
1413 SK_LOCK_ASSERT_HELD();
1414 ASSERT(na->na_type == NA_MONITOR);
1415
1416 if (pna != NULL) {
1417 (void) na_release_locked(pna);
1418 mna->mna_pna = NULL;
1419 }
1420 }
1421
1422 /* check if chr is a request for a monitor adapter that we can satisfy */
1423 int
nx_monitor_na_find(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)1424 nx_monitor_na_find(struct kern_nexus *nx, struct kern_channel *ch,
1425 struct chreq *chr, struct kern_channel *ch0, struct nxbind *nxb,
1426 struct proc *p, struct nexus_adapter **na, boolean_t create)
1427 {
1428 #pragma unused(ch)
1429 boolean_t zcopy = !!(chr->cr_mode & CHMODE_MONITOR_NO_COPY);
1430 struct nexus_adapter *pna = NULL; /* parent adapter */
1431 struct nexus_monitor_adapter *mna = NULL;
1432 char monsuff[10] = "";
1433 struct chreq pchr;
1434 uint32_t i;
1435 int error;
1436 enum txrx t;
1437
1438 SK_LOCK_ASSERT_HELD();
1439 *na = NULL;
1440
1441 #if SK_LOG
1442 uuid_string_t uuidstr;
1443 SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u "
1444 "ring_id %d ring_set %u ep_type %u:%u ch0 0x%llx create %u%s",
1445 chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr),
1446 (int)chr->cr_port, chr->cr_mode, CHMODE_BITS,
1447 chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set,
1448 chr->cr_real_endpoint, chr->cr_endpoint, SK_KVA(ch0), create,
1449 !(chr->cr_mode & CHMODE_MONITOR) ? " (skipped)" : "");
1450 #endif /* SK_LOG */
1451
1452 if (!(chr->cr_mode & CHMODE_MONITOR)) {
1453 return 0;
1454 }
1455
1456 /* XXX: Don't allow user packet pool mode in monitor for now */
1457 if (chr->cr_mode & CHMODE_USER_PACKET_POOL) {
1458 SK_ERR("User Packet pool mode not supported for monitor");
1459 return ENOTSUP;
1460 }
1461
1462 mna = na_mon_alloc(Z_WAITOK);
1463
1464 ASSERT(mna->mna_up.na_type == NA_MONITOR);
1465 ASSERT(mna->mna_up.na_free == na_mon_free);
1466
1467 /* override the ring set since we're monitoring */
1468 chr->cr_ring_set = RING_SET_ALL;
1469
1470 if (ch0 != NULL) {
1471 /*
1472 * We've been given the owning channel from ch_open();
1473 * use this as shortcut since otherwise we'd have to
1474 * find it ourselves.
1475 */
1476 #if (DEBUG || DEVELOPMENT)
1477 ASSERT(!(ch0->ch_info->cinfo_ch_mode & CHMODE_MONITOR));
1478 ASSERT(ch0->ch_info->cinfo_nx_port == chr->cr_port);
1479 #endif /* DEBUG || DEVELOPMENT */
1480 pna = ch0->ch_na;
1481 na_retain_locked(pna);
1482 } else {
1483 /*
1484 * First, try to find the adapter that we want to monitor
1485 * We use the same chr, after we have turned off the monitor
1486 * flags. In this way we can potentially monitor everything
1487 * skywalk understands, except other monitors.
1488 */
1489 memcpy(&pchr, chr, sizeof(pchr));
1490 pchr.cr_mode &= ~CHMODE_MONITOR;
1491 error = na_find(ch, nx, &pchr, ch0, nxb, p, &pna, create);
1492 if (error != 0) {
1493 SK_ERR("parent lookup failed: %d", error);
1494 return error;
1495 }
1496 }
1497 ASSERT(pna != NULL);
1498 SK_DF(SK_VERB_MONITOR,
1499 "found parent: \"%s\" (0x%llx)", pna->na_name, SK_KVA(pna));
1500
1501 if (!NA_IS_ACTIVE(pna)) {
1502 /* parent not in skywalk mode */
1503 /*
1504 * XXX we can wait for the parent to enter skywalk mode,
1505 * by intercepting its na_activate() callback (2014-03-16)
1506 */
1507 SK_ERR("parent \"%s\" (0x%llx) not in skywalk mode",
1508 pna->na_name, SK_KVA(pna));
1509 error = ENXIO;
1510 goto put_out;
1511 } else if (zcopy && NA_KERNEL_ONLY(pna)) {
1512 /*
1513 * Zero-copy mode requires the parent adapter to be
1514 * created in a non-kernel-only mode.
1515 */
1516 SK_ERR("parent \"%s\" (0x%llx) is in kernel-only mode",
1517 pna->na_name, SK_KVA(pna));
1518 error = ENODEV;
1519 goto put_out;
1520 }
1521
1522 /* grab all the rings we need in the parent */
1523 mna->mna_pna = pna;
1524 error = na_interp_ringid(pna, chr->cr_ring_id, chr->cr_ring_set,
1525 mna->mna_first, mna->mna_last);
1526 if (error != 0) {
1527 SK_ERR("ring_mode %u ring_id %d error %d", chr->cr_ring_set,
1528 (int)chr->cr_ring_id, error);
1529 goto put_out;
1530 }
1531 if (mna->mna_last[NR_TX] - mna->mna_first[NR_TX] == 1) {
1532 (void) snprintf(monsuff, 10, "-%u", mna->mna_first[NR_TX]);
1533 }
1534 (void) snprintf(mna->mna_up.na_name, sizeof(mna->mna_up.na_name),
1535 "%s%s/%s%s%s", pna->na_name, monsuff, zcopy ? "z" : "",
1536 (chr->cr_mode & CHMODE_MONITOR_TX) ? "r" : "",
1537 (chr->cr_mode & CHMODE_MONITOR_RX) ? "t" : "");
1538 uuid_generate_random(mna->mna_up.na_uuid);
1539
1540 /* these don't apply to the monitor adapter */
1541 *(nexus_stats_type_t *)(uintptr_t)&mna->mna_up.na_stats_type =
1542 NEXUS_STATS_TYPE_INVALID;
1543 *(uint32_t *)(uintptr_t)&mna->mna_up.na_flowadv_max = 0;
1544
1545 if (zcopy) {
1546 /*
1547 * Zero copy monitors need exclusive access
1548 * to the monitored rings.
1549 */
1550 for_rx_tx(t) {
1551 if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) {
1552 continue;
1553 }
1554 for (i = mna->mna_first[t];
1555 i < mna->mna_last[t]; i++) {
1556 struct __kern_channel_ring *kring =
1557 &NAKR(pna, t)[i];
1558 if (kring->ckr_n_monitors > 0) {
1559 error = EBUSY;
1560 SK_ERR("kr \"%s\" already monitored "
1561 "by \"%s\"", kring->ckr_name,
1562 kring->ckr_monitors[0]->ckr_name);
1563 goto put_out;
1564 }
1565 }
1566 }
1567 mna->mna_up.na_activate = nx_mon_zcopy_na_activate;
1568 mna->mna_up.na_dtor = nx_mon_zcopy_na_dtor;
1569 /*
1570 * To have zero copy, we need to use the same memory allocator
1571 * as the monitored port.
1572 */
1573 mna->mna_up.na_arena = pna->na_arena;
1574 skmem_arena_retain((&mna->mna_up)->na_arena);
1575 atomic_bitset_32(&mna->mna_up.na_flags, NAF_MEM_LOANED);
1576 } else {
1577 /* normal monitors are incompatible with zero copy ones */
1578 for_rx_tx(t) {
1579 if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) {
1580 continue;
1581 }
1582 for (i = mna->mna_first[t];
1583 i < mna->mna_last[t]; i++) {
1584 struct __kern_channel_ring *kring =
1585 &NAKR(pna, t)[i];
1586 if (kring->ckr_n_monitors > 0 &&
1587 KRNA(kring->ckr_monitors[0])->
1588 na_activate == nx_mon_zcopy_na_activate) {
1589 error = EBUSY;
1590 SK_ERR("kr \"%s\" is busy (zcopy)",
1591 kring->ckr_name);
1592 goto put_out;
1593 }
1594 }
1595 }
1596 mna->mna_up.na_activate = nx_mon_na_activate;
1597 mna->mna_up.na_dtor = nx_mon_na_dtor;
1598 /*
1599 * allocate a new (private) allocator instance using the
1600 * parent nexus configuration.
1601 */
1602 if ((error = nx_monitor_prov_s.nxdom_prov_mem_new(
1603 NX_DOM_PROV(nx), nx, &mna->mna_up)) != 0) {
1604 ASSERT(mna->mna_up.na_arena == NULL);
1605 goto put_out;
1606 }
1607 ASSERT(mna->mna_up.na_arena != NULL);
1608 mna->mna_up.na_rxsync = nx_mon_na_rxsync;
1609 }
1610 *(nexus_meta_type_t *)(uintptr_t)&mna->mna_up.na_md_type =
1611 pna->na_md_type;
1612 *(nexus_meta_subtype_t *)(uintptr_t)&mna->mna_up.na_md_subtype =
1613 pna->na_md_subtype;
1614
1615 /* a do-nothing txsync: monitors cannot be used to inject packets */
1616 mna->mna_up.na_txsync = nx_mon_na_txsync;
1617 mna->mna_up.na_rxsync = nx_mon_na_rxsync;
1618 mna->mna_up.na_krings_create = nx_mon_na_krings_create;
1619 mna->mna_up.na_krings_delete = nx_mon_na_krings_delete;
1620
1621 /*
1622 * We set the number of our na_rx_rings to be
1623 * max(na_num_tx_rings, na_num_rx_rings) in the parent
1624 */
1625 na_set_nrings(&mna->mna_up, NR_TX, na_get_nrings(pna, NR_TX));
1626 na_set_nrings(&mna->mna_up, NR_RX, na_get_nrings(pna, NR_RX));
1627 if (na_get_nrings(pna, NR_TX) > na_get_nrings(pna, NR_RX)) {
1628 na_set_nrings(&mna->mna_up, NR_RX, na_get_nrings(pna, NR_TX));
1629 }
1630 na_set_nslots(&mna->mna_up, NR_TX, na_get_nslots(pna, NR_TX));
1631 na_set_nslots(&mna->mna_up, NR_RX, na_get_nslots(pna, NR_RX));
1632
1633 na_attach_common(&mna->mna_up, nx, &nx_monitor_prov_s);
1634
1635 /* remember the traffic directions we have to monitor */
1636 mna->mna_mode = (chr->cr_mode & CHMODE_MONITOR);
1637
1638 /* keep the reference to the parent */
1639 *na = &mna->mna_up;
1640 na_retain_locked(*na);
1641
1642 /* sanity check: monitor and monitored adapters must share the nexus */
1643 ASSERT((*na)->na_nx == pna->na_nx);
1644
1645 #if SK_LOG
1646 SK_DF(SK_VERB_MONITOR, "created monitor adapter 0x%llx", SK_KVA(mna));
1647 SK_DF(SK_VERB_MONITOR, "na_name: \"%s\"", mna->mna_up.na_name);
1648 SK_DF(SK_VERB_MONITOR, " UUID: %s",
1649 sk_uuid_unparse(mna->mna_up.na_uuid, uuidstr));
1650 SK_DF(SK_VERB_MONITOR, " nx: 0x%llx (\"%s\":\"%s\")",
1651 SK_KVA(mna->mna_up.na_nx), NX_DOM(mna->mna_up.na_nx)->nxdom_name,
1652 NX_DOM_PROV(mna->mna_up.na_nx)->nxdom_prov_name);
1653 SK_DF(SK_VERB_MONITOR, " flags: 0x%b",
1654 mna->mna_up.na_flags, NAF_BITS);
1655 SK_DF(SK_VERB_MONITOR, " rings: tx %u rx %u",
1656 na_get_nrings(&mna->mna_up, NR_TX),
1657 na_get_nrings(&mna->mna_up, NR_RX));
1658 SK_DF(SK_VERB_MONITOR, " slots: tx %u rx %u",
1659 na_get_nslots(&mna->mna_up, NR_TX),
1660 na_get_nslots(&mna->mna_up, NR_RX));
1661 #if CONFIG_NEXUS_USER_PIPE
1662 SK_DF(SK_VERB_MONITOR, " next_pipe: %u", mna->mna_up.na_next_pipe);
1663 SK_DF(SK_VERB_MONITOR, " max_pipes: %u", mna->mna_up.na_max_pipes);
1664 #endif /* CONFIG_NEXUS_USER_PIPE */
1665 SK_DF(SK_VERB_MONITOR, " mna_tx_rings: [%u,%u)", mna->mna_first[NR_TX],
1666 mna->mna_last[NR_TX]);
1667 SK_DF(SK_VERB_MONITOR, " mna_rx_rings: [%u,%u)", mna->mna_first[NR_RX],
1668 mna->mna_last[NR_RX]);
1669 SK_DF(SK_VERB_MONITOR, " mna_mode: %u", mna->mna_mode);
1670 #endif /* SK_LOG */
1671
1672 return 0;
1673
1674 put_out:
1675 if (pna != NULL) {
1676 (void) na_release_locked(pna);
1677 pna = NULL;
1678 }
1679 NA_FREE(&mna->mna_up);
1680 return error;
1681 }
1682
1683 static void
nx_mon_quantum_copy_64x(const enum txrx t,kern_packet_t dph,const uint16_t doff,kern_packet_t sph,const uint16_t soff,const uint32_t len,const boolean_t unused_arg1,const uint16_t unused_arg2,const uint16_t unused_arg3,const boolean_t unused_arg4)1684 nx_mon_quantum_copy_64x(const enum txrx t, kern_packet_t dph,
1685 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
1686 const uint32_t len, const boolean_t unused_arg1,
1687 const uint16_t unused_arg2, const uint16_t unused_arg3,
1688 const boolean_t unused_arg4)
1689 {
1690 /* for function prototype parity with pkt_copy_from_pkt_t */
1691 #pragma unused(unused_arg1, unused_arg2, unused_arg3, unused_arg4)
1692 #pragma unused(t, doff, soff)
1693 struct __kern_quantum *dqum = SK_PTR_ADDR_KQUM(dph);
1694 struct __kern_quantum *squm = SK_PTR_ADDR_KQUM(sph);
1695 uint8_t *sbuf, *dbuf;
1696
1697 ASSERT(METADATA_TYPE(squm) == NEXUS_META_TYPE_QUANTUM);
1698 ASSERT(METADATA_TYPE(squm) == METADATA_TYPE(dqum));
1699 VERIFY(IS_P2ALIGNED(len, 64));
1700
1701 MD_BUFLET_ADDR(squm, sbuf);
1702 MD_BUFLET_ADDR(dqum, dbuf);
1703 VERIFY(IS_P2ALIGNED(dbuf, sizeof(uint64_t)));
1704
1705 if (__probable(IS_P2ALIGNED(sbuf, sizeof(uint64_t)))) {
1706 sk_copy64_64x((uint64_t *)(void *)sbuf,
1707 (uint64_t *)(void *)dbuf, len);
1708 } else {
1709 bcopy(sbuf, dbuf, len);
1710 }
1711 /*
1712 * This copy routine only copies to/from a buflet, so the length
1713 * is guaranteed be <= the size of a buflet.
1714 */
1715 VERIFY(len <= UINT16_MAX);
1716 METADATA_SET_LEN(dqum, (uint16_t)len, 0);
1717 }
1718