xref: /xnu-11417.121.6/bsd/skywalk/core/skywalk.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <pexpert/pexpert.h>    /* for PE_parse_boot_argn */
31 #include <sys/codesign.h>       /* for csproc_get_platform_binary */
32 #include <sys/reason.h>
33 #if CONFIG_MACF
34 #include <security/mac_framework.h>
35 #endif /* CONFIG_MACF */
36 
37 #ifndef htole16
38 #if BYTE_ORDER == LITTLE_ENDIAN
39 #define htole16(x)      ((uint16_t)(x))
40 #else /* BYTE_ORDER != LITTLE_ENDIAN */
41 #define htole16(x)      bswap16((x))
42 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
43 #endif /* htole16 */
44 
45 LCK_GRP_DECLARE(sk_lock_group, "sk_lock");
46 LCK_ATTR_DECLARE(sk_lock_attr, 0, 0);
47 LCK_MTX_DECLARE_ATTR(sk_lock, &sk_lock_group, &sk_lock_attr);
48 
49 static void skywalk_fini(void);
50 static int sk_priv_chk(proc_t, kauth_cred_t, int);
51 
52 static int __sk_inited = 0;
53 #if (DEVELOPMENT || DEBUG)
54 size_t sk_copy_thres = SK_COPY_THRES;
55 uint64_t sk_verbose;
56 #endif /* DEVELOPMENT || DEBUG */
57 uint32_t sk_debug;
58 uint64_t sk_features =
59 #if SKYWALK
60     SK_FEATURE_SKYWALK |
61 #endif
62 #if DEVELOPMENT
63     SK_FEATURE_DEVELOPMENT |
64 #endif
65 #if DEBUG
66     SK_FEATURE_DEBUG |
67 #endif
68 #if CONFIG_NEXUS_FLOWSWITCH
69     SK_FEATURE_NEXUS_FLOWSWITCH |
70 #endif
71 #if CONFIG_NEXUS_MONITOR
72     SK_FEATURE_NEXUS_MONITOR |
73 #endif
74 #if CONFIG_NEXUS_NETIF
75     SK_FEATURE_NEXUS_NETIF |
76 #endif
77 #if CONFIG_NEXUS_USER_PIPE
78     SK_FEATURE_NEXUS_USER_PIPE |
79 #endif
80 #if CONFIG_NEXUS_KERNEL_PIPE
81     SK_FEATURE_NEXUS_KERNEL_PIPE |
82 #endif
83 #if CONFIG_NEXUS_KERNEL_PIPE && (DEVELOPMENT || DEBUG)
84     SK_FEATURE_NEXUS_KERNEL_PIPE_LOOPBACK |
85 #endif
86 #if (DEVELOPMENT || DEBUG)
87     SK_FEATURE_DEV_OR_DEBUG |
88 #endif
89     0;
90 
91 uint32_t sk_opp_defunct = 0;    /* opportunistic defunct */
92 
93 /* checksum offload is generic to any nexus (not specific to flowswitch) */
94 uint32_t sk_cksum_tx = 1;       /* advertise outbound offload */
95 uint32_t sk_cksum_rx = 1;       /* perform inbound checksum offload */
96 
97 /* guard pages */
98 uint32_t sk_guard = 0;          /* guard pages (0: disable) */
99 #define SK_GUARD_MIN    1       /* minimum # of guard pages */
100 #define SK_GUARD_MAX    4       /* maximum # of guard pages */
101 uint32_t sk_headguard_sz = SK_GUARD_MIN; /* # of leading guard pages */
102 uint32_t sk_tailguard_sz = SK_GUARD_MIN; /* # of trailing guard pages */
103 
104 #if (DEVELOPMENT || DEBUG)
105 uint32_t sk_txring_sz = 0;      /* flowswitch */
106 uint32_t sk_rxring_sz = 0;      /* flowswitch */
107 uint32_t sk_net_txring_sz = 0;  /* netif adapter */
108 uint32_t sk_net_rxring_sz = 0;  /* netif adapter */
109 uint32_t sk_min_pool_size = 0;  /* minimum packet pool size */
110 #endif /* !DEVELOPMENT && !DEBUG */
111 
112 uint32_t sk_max_flows = NX_FLOWADV_DEFAULT;
113 uint32_t sk_fadv_nchunks;       /* # of FO_FLOWADV_CHUNK in bitmap */
114 uint32_t sk_netif_compat_txmodel = NETIF_COMPAT_TXMODEL_DEFAULT;
115 uint32_t sk_netif_native_txmodel = NETIF_NATIVE_TXMODEL_DEFAULT;
116 /*
117  * Configures the RX aggregation logic for TCP in flowswitch.
118  * A non-zero value enables the aggregation logic, with the maximum
119  * aggregation length (in bytes) limited to this value.
120  *
121  * DO NOT increase beyond 16KB. If you do, we end up corrupting the data-stream
122  * as we create aggregate-mbufs with a pktlen > 16KB but only a single element.
123  */
124 uint32_t sk_fsw_rx_agg_tcp = 16384;
125 
126 /*
127  * Forces the RX host path to use or not use aggregation, regardless of the
128  * existence of filters (see sk_fsw_rx_agg_tcp_host_t for valid values).
129  */
130 uint32_t sk_fsw_rx_agg_tcp_host = SK_FSW_RX_AGG_TCP_HOST_AUTO;
131 
132 /*
133  * Configures the skywalk infrastructure for handling TCP TX aggregation.
134  * A non-zero value enables the support.
135  */
136 uint32_t sk_fsw_tx_agg_tcp = 1;
137 /*
138  * Configuration to limit the number of buffers for flowswitch VP channel.
139  */
140 uint32_t sk_fsw_max_bufs = 0;
141 /*
142  * GSO MTU for the channel path:
143  *   > 0: enable GSO and use value as the largest supported segment size
144  *  == 0: disable GSO
145  */
146 uint32_t sk_fsw_gso_mtu = 16 * 1024;
147 
148 /* list of interfaces that allow direct open from userspace */
149 #define SK_NETIF_DIRECT_MAX     8
150 char sk_netif_direct[SK_NETIF_DIRECT_MAX][IFXNAMSIZ];
151 uint32_t sk_netif_direct_cnt = 0;
152 
153 uint16_t sk_tx_delay_qlen = 16;                 /* packets */
154 uint16_t sk_tx_delay_timeout = (1 * 1000);        /* microseconds */
155 
156 #define SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ     64
157 #define SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ     64
158 uint32_t sk_netif_compat_aux_cell_tx_ring_sz =
159     SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ;
160 uint32_t sk_netif_compat_aux_cell_rx_ring_sz =
161     SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ;
162 
163 /* Wi-Fi Access Point */
164 #define SK_NETIF_COMPAT_WAP_TX_RING_SZ  128
165 #define SK_NETIF_COMPAT_WAP_RX_RING_SZ  128
166 uint32_t sk_netif_compat_wap_tx_ring_sz = SK_NETIF_COMPAT_WAP_TX_RING_SZ;
167 uint32_t sk_netif_compat_wap_rx_ring_sz = SK_NETIF_COMPAT_WAP_RX_RING_SZ;
168 
169 /* AWDL */
170 #define SK_NETIF_COMPAT_AWDL_TX_RING_SZ 128
171 #define SK_NETIF_COMPAT_AWDL_RX_RING_SZ 128
172 uint32_t sk_netif_compat_awdl_tx_ring_sz = SK_NETIF_COMPAT_AWDL_TX_RING_SZ;
173 uint32_t sk_netif_compat_awdl_rx_ring_sz = SK_NETIF_COMPAT_AWDL_RX_RING_SZ;
174 
175 /* Wi-Fi Infrastructure */
176 #define SK_NETIF_COMPAT_WIF_TX_RING_SZ  128
177 #define SK_NETIF_COMPAT_WIF_RX_RING_SZ  128
178 uint32_t sk_netif_compat_wif_tx_ring_sz = SK_NETIF_COMPAT_WIF_TX_RING_SZ;
179 uint32_t sk_netif_compat_wif_rx_ring_sz = SK_NETIF_COMPAT_WIF_RX_RING_SZ;
180 
181 #define SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ      128
182 #define SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ      128
183 uint32_t sk_netif_compat_usb_eth_tx_ring_sz =
184     SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ;
185 uint32_t sk_netif_compat_usb_eth_rx_ring_sz =
186     SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ;
187 
188 #define SK_NETIF_COMPAT_RX_MBQ_LIMIT    8192
189 int sk_netif_compat_rx_mbq_limit = SK_NETIF_COMPAT_RX_MBQ_LIMIT;
190 
191 uint32_t sk_netif_tx_mit = SK_NETIF_MIT_AUTO;
192 uint32_t sk_netif_rx_mit = SK_NETIF_MIT_AUTO;
193 char sk_ll_prefix[IFNAMSIZ] = "llw";
194 uint32_t sk_rx_sync_packets = 1;
195 uint32_t sk_channel_buflet_alloc = 0;
196 uint32_t sk_netif_queue_stat_enable = 0;
197 
198 SYSCTL_NODE(_kern, OID_AUTO, skywalk, CTLFLAG_RW | CTLFLAG_LOCKED,
199     0, "Skywalk parameters");
200 SYSCTL_NODE(_kern_skywalk, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_LOCKED,
201     0, "Skywalk statistics");
202 
203 SYSCTL_OPAQUE(_kern_skywalk, OID_AUTO, features, CTLFLAG_RD | CTLFLAG_LOCKED,
204     &sk_features, sizeof(sk_features), "-", "Skywalk features");
205 
206 #if (DEVELOPMENT || DEBUG)
207 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
208     &sk_verbose, "Skywalk verbose mode");
209 SYSCTL_UINT(_kern_skywalk, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
210     &sk_debug, 0, "Skywalk debug mode");
211 SYSCTL_LONG(_kern_skywalk, OID_AUTO, sk_copy_thres, CTLFLAG_RW | CTLFLAG_LOCKED,
212     &sk_copy_thres, "Skywalk copy threshold");
213 static int __priv_check = 1;
214 SYSCTL_INT(_kern_skywalk, OID_AUTO, priv_check, CTLFLAG_RW | CTLFLAG_LOCKED,
215     &__priv_check, 0, "Skywalk privilege check");
216 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_opp_defunct, CTLFLAG_RW | CTLFLAG_LOCKED,
217     &sk_opp_defunct, 0, "Defunct opportunistically");
218 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_tx, CTLFLAG_RW | CTLFLAG_LOCKED,
219     &sk_cksum_tx, 0, "Advertise (and perform) outbound checksum offload");
220 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_rx, CTLFLAG_RW | CTLFLAG_LOCKED,
221     &sk_cksum_rx, 0, "Perform inbound checksum offload");
222 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_rx_sync_packets, CTLFLAG_RW | CTLFLAG_LOCKED,
223     &sk_rx_sync_packets, 0, "Enable RX sync packets");
224 SYSCTL_UINT(_kern_skywalk, OID_AUTO, chan_buf_alloc,
225     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_channel_buflet_alloc, 0,
226     "channel buflet allocation (enable/disable)");
227 #endif /* !DEVELOPMENT && !DEBUG */
228 
229 #if (DEVELOPMENT || DEBUG)
230 uint32_t sk_inject_error_rmask = 0x3;
231 SYSCTL_UINT(_kern_skywalk, OID_AUTO, inject_error_rmask,
232     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_inject_error_rmask, 0x3, "");
233 #endif /* !DEVELOPMENT && !DEBUG */
234 
235 #if (DEVELOPMENT || DEBUG)
236 static void skywalk_self_tests(void);
237 #endif /* (DEVELOPMENT || DEBUG) */
238 
239 #define SKMEM_TAG_SYSCTL_BUF "com.apple.skywalk.sysctl_buf"
240 SKMEM_TAG_DEFINE(skmem_tag_sysctl_buf, SKMEM_TAG_SYSCTL_BUF);
241 
242 #define SKMEM_TAG_OID       "com.apple.skywalk.skoid"
243 SKMEM_TAG_DEFINE(skmem_tag_oid, SKMEM_TAG_OID);
244 
245 #if (SK_LOG || DEVELOPMENT || DEBUG)
246 #define SKMEM_TAG_DUMP  "com.apple.skywalk.dump"
247 static SKMEM_TAG_DEFINE(skmem_tag_dump, SKMEM_TAG_DUMP);
248 
249 static uint32_t sk_dump_buf_size;
250 static char *__sized_by(sk_dump_buf_size) sk_dump_buf;
251 #define SK_DUMP_BUF_SIZE        2048
252 #define SK_DUMP_BUF_ALIGN       16
253 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
254 
255 __startup_func
256 void
__sk_tag_make(const struct sk_tag_spec * spec)257 __sk_tag_make(const struct sk_tag_spec *spec)
258 {
259 	*spec->skt_var = kern_allocation_name_allocate(spec->skt_name, 0);
260 }
261 
262 boolean_t
skywalk_netif_direct_enabled(void)263 skywalk_netif_direct_enabled(void)
264 {
265 	return sk_netif_direct_cnt > 0;
266 }
267 
268 boolean_t
skywalk_netif_direct_allowed(const char * ifname)269 skywalk_netif_direct_allowed(const char *ifname)
270 {
271 	uint32_t i;
272 
273 	for (i = 0; i < sk_netif_direct_cnt; i++) {
274 		if (strlcmp(sk_netif_direct[i], ifname, IFXNAMSIZ) == 0) {
275 			return TRUE;
276 		}
277 	}
278 	return FALSE;
279 }
280 
281 #if (DEVELOPMENT || DEBUG)
282 static void
parse_netif_direct(void)283 parse_netif_direct(void)
284 {
285 	char buf[(IFXNAMSIZ + 1) * SK_NETIF_DIRECT_MAX];
286 	size_t i, curr, len, iflen;
287 
288 	if (!PE_parse_boot_arg_str("sk_netif_direct", buf, sizeof(buf))) {
289 		return;
290 	}
291 
292 	curr = 0;
293 	len = strbuflen(buf);
294 	for (i = 0; i < len + 1 &&
295 	    sk_netif_direct_cnt < SK_NETIF_DIRECT_MAX; i++) {
296 		if (buf[i] != ',' && buf[i] != '\0') {
297 			continue;
298 		}
299 
300 		buf[i] = '\0';
301 		iflen = i - curr;
302 		if (iflen > 0 && iflen < IFXNAMSIZ) {
303 			(void) strbufcpy(sk_netif_direct[sk_netif_direct_cnt],
304 			    IFXNAMSIZ, buf + curr, IFXNAMSIZ);
305 			sk_netif_direct_cnt++;
306 		}
307 		curr = i + 1;
308 	}
309 }
310 #endif /* DEVELOPMENT || DEBUG */
311 
312 static void
skywalk_fini(void)313 skywalk_fini(void)
314 {
315 	SK_LOCK_ASSERT_HELD();
316 
317 	if (__sk_inited) {
318 #if (DEVELOPMENT || DEBUG)
319 		skmem_test_fini();
320 		cht_test_fini();
321 #endif /* (DEVELOPMENT || DEBUG) */
322 		channel_fini();
323 		nexus_fini();
324 		skmem_fini();
325 		flowidns_fini();
326 
327 #if (SK_LOG || DEVELOPMENT || DEBUG)
328 		if (sk_dump_buf != NULL) {
329 			sk_free_data_sized_by(sk_dump_buf, sk_dump_buf_size);
330 			sk_dump_buf = NULL;
331 			sk_dump_buf_size = 0;
332 		}
333 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
334 
335 		__sk_inited = 0;
336 	}
337 }
338 
339 int
skywalk_init(void)340 skywalk_init(void)
341 {
342 	int error;
343 
344 	VERIFY(!__sk_inited);
345 
346 	_CASSERT(sizeof(kern_packet_t) == sizeof(uint64_t));
347 	_CASSERT(sizeof(bitmap_t) == sizeof(uint64_t));
348 
349 #if (DEVELOPMENT || DEBUG)
350 	PE_parse_boot_argn("sk_debug", &sk_debug, sizeof(sk_debug));
351 	PE_parse_boot_argn("sk_verbose", &sk_verbose, sizeof(sk_verbose));
352 	(void) PE_parse_boot_argn("sk_opp_defunct", &sk_opp_defunct,
353 	    sizeof(sk_opp_defunct));
354 	(void) PE_parse_boot_argn("sk_cksum_tx", &sk_cksum_tx,
355 	    sizeof(sk_cksum_tx));
356 	(void) PE_parse_boot_argn("sk_cksum_rx", &sk_cksum_rx,
357 	    sizeof(sk_cksum_rx));
358 	(void) PE_parse_boot_argn("sk_txring_sz", &sk_txring_sz,
359 	    sizeof(sk_txring_sz));
360 	(void) PE_parse_boot_argn("sk_rxring_sz", &sk_rxring_sz,
361 	    sizeof(sk_rxring_sz));
362 	(void) PE_parse_boot_argn("sk_net_txring_sz", &sk_net_txring_sz,
363 	    sizeof(sk_net_txring_sz));
364 	(void) PE_parse_boot_argn("sk_net_rxring_sz", &sk_net_rxring_sz,
365 	    sizeof(sk_net_rxring_sz));
366 	(void) PE_parse_boot_argn("sk_max_flows", &sk_max_flows,
367 	    sizeof(sk_max_flows));
368 	(void) PE_parse_boot_argn("sk_native_txmodel", &sk_netif_native_txmodel,
369 	    sizeof(sk_netif_native_txmodel));
370 	(void) PE_parse_boot_argn("sk_compat_txmodel", &sk_netif_compat_txmodel,
371 	    sizeof(sk_netif_compat_txmodel));
372 	(void) PE_parse_boot_argn("sk_tx_delay_qlen", &sk_tx_delay_qlen,
373 	    sizeof(sk_tx_delay_qlen));
374 	(void) PE_parse_boot_argn("sk_ts_delay_timeout", &sk_tx_delay_timeout,
375 	    sizeof(sk_tx_delay_timeout));
376 	(void) PE_parse_boot_argn("sk_compat_aux_cell_tx_ring_sz",
377 	    &sk_netif_compat_aux_cell_tx_ring_sz,
378 	    sizeof(sk_netif_compat_aux_cell_tx_ring_sz));
379 	(void) PE_parse_boot_argn("sk_compat_aux_cell_rx_ring_sz",
380 	    &sk_netif_compat_aux_cell_rx_ring_sz,
381 	    sizeof(sk_netif_compat_aux_cell_rx_ring_sz));
382 	(void) PE_parse_boot_argn("sk_compat_wap_tx_ring_sz",
383 	    &sk_netif_compat_wap_tx_ring_sz,
384 	    sizeof(sk_netif_compat_wap_tx_ring_sz));
385 	(void) PE_parse_boot_argn("sk_compat_wap_rx_ring_sz",
386 	    &sk_netif_compat_wap_rx_ring_sz,
387 	    sizeof(sk_netif_compat_wap_rx_ring_sz));
388 	(void) PE_parse_boot_argn("sk_compat_awdl_tx_ring_sz",
389 	    &sk_netif_compat_awdl_tx_ring_sz,
390 	    sizeof(sk_netif_compat_awdl_tx_ring_sz));
391 	(void) PE_parse_boot_argn("sk_compat_awdl_rx_ring_sz",
392 	    &sk_netif_compat_awdl_rx_ring_sz,
393 	    sizeof(sk_netif_compat_awdl_rx_ring_sz));
394 	(void) PE_parse_boot_argn("sk_compat_wif_tx_ring_sz",
395 	    &sk_netif_compat_wif_tx_ring_sz,
396 	    sizeof(sk_netif_compat_wif_tx_ring_sz));
397 	(void) PE_parse_boot_argn("sk_compat_wif_rx_ring_sz",
398 	    &sk_netif_compat_wif_rx_ring_sz,
399 	    sizeof(sk_netif_compat_wif_rx_ring_sz));
400 	(void) PE_parse_boot_argn("sk_compat_usb_eth_tx_ring_sz",
401 	    &sk_netif_compat_usb_eth_tx_ring_sz,
402 	    sizeof(sk_netif_compat_usb_eth_tx_ring_sz));
403 	(void) PE_parse_boot_argn("sk_compat_usb_eth_rx_ring_sz",
404 	    &sk_netif_compat_usb_eth_rx_ring_sz,
405 	    sizeof(sk_netif_compat_usb_eth_rx_ring_sz));
406 	(void) PE_parse_boot_argn("sk_compat_rx_mbq_limit",
407 	    &sk_netif_compat_rx_mbq_limit, sizeof(sk_netif_compat_rx_mbq_limit));
408 	(void) PE_parse_boot_argn("sk_netif_tx_mit",
409 	    &sk_netif_tx_mit, sizeof(sk_netif_tx_mit));
410 	(void) PE_parse_boot_argn("sk_netif_rx_mit",
411 	    &sk_netif_rx_mit, sizeof(sk_netif_rx_mit));
412 	(void) PE_parse_boot_arg_str("sk_ll_prefix", sk_ll_prefix,
413 	    sizeof(sk_ll_prefix));
414 	(void) PE_parse_boot_argn("sk_netif_q_stats", &sk_netif_queue_stat_enable,
415 	    sizeof(sk_netif_queue_stat_enable));
416 	parse_netif_direct();
417 	(void) PE_parse_boot_argn("sk_fsw_rx_agg_tcp", &sk_fsw_rx_agg_tcp,
418 	    sizeof(sk_fsw_rx_agg_tcp));
419 	(void) PE_parse_boot_argn("sk_fsw_tx_agg_tcp", &sk_fsw_tx_agg_tcp,
420 	    sizeof(sk_fsw_tx_agg_tcp));
421 	(void) PE_parse_boot_argn("sk_fsw_gso_mtu", &sk_fsw_gso_mtu,
422 	    sizeof(sk_fsw_gso_mtu));
423 	(void) PE_parse_boot_argn("sk_fsw_max_bufs", &sk_fsw_max_bufs,
424 	    sizeof(sk_fsw_max_bufs));
425 	(void) PE_parse_boot_argn("sk_rx_sync_packets", &sk_rx_sync_packets,
426 	    sizeof(sk_rx_sync_packets));
427 	(void) PE_parse_boot_argn("sk_chan_buf_alloc", &sk_channel_buflet_alloc,
428 	    sizeof(sk_channel_buflet_alloc));
429 	(void) PE_parse_boot_argn("sk_guard", &sk_guard, sizeof(sk_guard));
430 	(void) PE_parse_boot_argn("sk_headguard_sz", &sk_headguard_sz,
431 	    sizeof(sk_headguard_sz));
432 	(void) PE_parse_boot_argn("sk_tailguard_sz", &sk_tailguard_sz,
433 	    sizeof(sk_tailguard_sz));
434 	(void) PE_parse_boot_argn("sk_min_pool_size", &sk_min_pool_size,
435 	    sizeof(sk_min_pool_size));
436 #endif /* DEVELOPMENT || DEBUG */
437 
438 	if (sk_max_flows == 0) {
439 		sk_max_flows = NX_FLOWADV_DEFAULT;
440 	} else if (sk_max_flows > NX_FLOWADV_MAX) {
441 		sk_max_flows = NX_FLOWADV_MAX;
442 	}
443 
444 	if (sk_netif_tx_mit > SK_NETIF_MIT_MAX) {
445 		sk_netif_tx_mit = SK_NETIF_MIT_MAX;
446 	}
447 	if (sk_netif_rx_mit > SK_NETIF_MIT_MAX) {
448 		sk_netif_rx_mit = SK_NETIF_MIT_MAX;
449 	}
450 
451 	sk_fadv_nchunks = (uint32_t)P2ROUNDUP(sk_max_flows, FO_FLOWADV_CHUNK) /
452 	    FO_FLOWADV_CHUNK;
453 
454 	if (sk_guard) {
455 		uint32_t sz;
456 		/* leading guard page(s) */
457 		if (sk_headguard_sz == 0) {
458 			read_frandom(&sz, sizeof(sz));
459 			sk_headguard_sz = (sz % (SK_GUARD_MAX + 1));
460 		} else if (sk_headguard_sz > SK_GUARD_MAX) {
461 			sk_headguard_sz = SK_GUARD_MAX;
462 		}
463 		if (sk_headguard_sz < SK_GUARD_MIN) {
464 			sk_headguard_sz = SK_GUARD_MIN;
465 		}
466 		/* trailing guard page(s) */
467 		if (sk_tailguard_sz == 0) {
468 			read_frandom(&sz, sizeof(sz));
469 			sk_tailguard_sz = (sz % (SK_GUARD_MAX + 1));
470 		} else if (sk_tailguard_sz > SK_GUARD_MAX) {
471 			sk_tailguard_sz = SK_GUARD_MAX;
472 		}
473 		if (sk_tailguard_sz < SK_GUARD_MIN) {
474 			sk_tailguard_sz = SK_GUARD_MIN;
475 		}
476 	} else {
477 		sk_headguard_sz = sk_tailguard_sz = SK_GUARD_MIN;
478 	}
479 	ASSERT(sk_headguard_sz >= SK_GUARD_MIN);
480 	ASSERT(sk_headguard_sz <= SK_GUARD_MAX);
481 	ASSERT(sk_tailguard_sz >= SK_GUARD_MIN);
482 	ASSERT(sk_tailguard_sz <= SK_GUARD_MAX);
483 
484 	__sk_inited = 1;
485 
486 	SK_LOCK();
487 	skmem_init();
488 	error = nexus_init();
489 	if (error == 0) {
490 		error = channel_init();
491 	}
492 	if (error != 0) {
493 		skywalk_fini();
494 	}
495 	SK_UNLOCK();
496 
497 	if (error == 0) {
498 #if (SK_LOG || DEVELOPMENT || DEBUG)
499 		/* allocate space for sk_dump_buf */
500 		sk_dump_buf = sk_alloc_data(SK_DUMP_BUF_SIZE, Z_WAITOK | Z_NOFAIL,
501 		    skmem_tag_dump);
502 		sk_dump_buf_size = SK_DUMP_BUF_SIZE;
503 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
504 
505 		netns_init();
506 		protons_init();
507 		flowidns_init();
508 
509 #if (DEVELOPMENT || DEBUG)
510 		skywalk_self_tests();
511 		skmem_test_init();
512 		cht_test_init();
513 #endif /* DEVELOPMENT || DEBUG */
514 	}
515 
516 	return error;
517 }
518 
519 /*
520  * csproc_get_platform_binary() returns non-zero if the process is platform
521  * code, which means that it is considered part of the Operating System.
522  * On iOS, that means it's contained in the trust cache or a loaded one.
523  * On macOS, everything signed by B&I is currently platform code, but the
524  * policy in general is subject to change.  Thus this is an approximate.
525  */
526 boolean_t
skywalk_check_platform_binary(proc_t p)527 skywalk_check_platform_binary(proc_t p)
528 {
529 	return (csproc_get_platform_binary(p) == 0) ? FALSE : TRUE;
530 }
531 
532 static int
sk_priv_chk(proc_t p,kauth_cred_t cred,int priv)533 sk_priv_chk(proc_t p, kauth_cred_t cred, int priv)
534 {
535 #pragma unused(p)
536 	int ret = EPERM;
537 
538 	if (cred != NULL) {
539 		ret = priv_check_cred(cred, priv, 0);
540 	}
541 #if (DEVELOPMENT || DEBUG)
542 	if (ret != 0) {
543 		const char *pstr;
544 
545 		switch (priv) {
546 		case PRIV_SKYWALK_REGISTER_USER_PIPE:
547 			pstr = "com.apple.private.skywalk.register-user-pipe";
548 			break;
549 
550 		case PRIV_SKYWALK_REGISTER_KERNEL_PIPE:
551 			pstr = "com.apple.private.skywalk.register-kernel-pipe";
552 			break;
553 
554 		case PRIV_SKYWALK_REGISTER_NET_IF:
555 			pstr = "com.apple.private.skywalk.register-net-if";
556 			break;
557 
558 		case PRIV_SKYWALK_REGISTER_FLOW_SWITCH:
559 			pstr = "com.apple.private.skywalk.register-flow-switch";
560 			break;
561 
562 		case PRIV_SKYWALK_OBSERVE_ALL:
563 			pstr = "com.apple.private.skywalk.observe-all";
564 			break;
565 
566 		case PRIV_SKYWALK_OBSERVE_STATS:
567 			pstr = "com.apple.private.skywalk.observe-stats";
568 			break;
569 
570 		case PRIV_SKYWALK_LOW_LATENCY_CHANNEL:
571 			pstr = "com.apple.private.skywalk.low-latency-channel";
572 			break;
573 
574 		default:
575 			pstr = "unknown";
576 			break;
577 		}
578 
579 #if SK_LOG
580 		if (__priv_check) {
581 			SK_DF(SK_VERB_PRIV, "%s(%d) insufficient privilege %d "
582 			    "(\"%s\") err %d", sk_proc_name_address(p),
583 			    sk_proc_pid(p), priv, pstr, ret);
584 		} else {
585 			SK_DF(SK_VERB_PRIV, "%s(%d) IGNORING missing privilege "
586 			    "%d (\"%s\") err %d", sk_proc_name_address(p),
587 			    sk_proc_pid(p), priv, pstr, ret);
588 		}
589 #endif /* SK_LOG */
590 
591 		/* ignore privilege check failures if requested */
592 		if (!__priv_check) {
593 			ret = 0;
594 		}
595 	}
596 #endif /* !DEVELOPMENT && !DEBUG */
597 
598 	return ret;
599 }
600 
601 int
skywalk_priv_check_cred(proc_t p,kauth_cred_t cred,int priv)602 skywalk_priv_check_cred(proc_t p, kauth_cred_t cred, int priv)
603 {
604 	return sk_priv_chk(p, cred, priv);
605 }
606 
607 #if CONFIG_MACF
608 int
skywalk_mac_system_check_proc_cred(proc_t p,const char * info_type)609 skywalk_mac_system_check_proc_cred(proc_t p, const char *info_type)
610 {
611 	int ret;
612 	kauth_cred_t cred = kauth_cred_proc_ref(p);
613 	ret = mac_system_check_info(cred, info_type);
614 	kauth_cred_unref(&cred);
615 
616 	return ret;
617 }
618 #endif /* CONFIG_MACF */
619 
620 /*
621  * Scan thru the list of privileges needed before we allow someone
622  * to open a handle to the Nexus controller.  This should be done
623  * at nxctl_create() time, and additional privilege check specific
624  * to the operation (e.g. register, etc.) should be done afterwards.
625  */
626 int
skywalk_nxctl_check_privileges(proc_t p,kauth_cred_t cred)627 skywalk_nxctl_check_privileges(proc_t p, kauth_cred_t cred)
628 {
629 	int ret = 0;
630 
631 	if (p == kernproc) {
632 		goto done;
633 	}
634 
635 	do {
636 		/*
637 		 * Check for observe-{stats,all} entitlements first
638 		 * before the rest, to account for nexus controller
639 		 * clients that don't need anything but statistics;
640 		 * it would help quiesce sandbox violation warnings.
641 		 */
642 		if ((ret = sk_priv_chk(p, cred,
643 		    PRIV_SKYWALK_OBSERVE_STATS)) == 0) {
644 			break;
645 		}
646 		if ((ret = sk_priv_chk(p, cred,
647 		    PRIV_SKYWALK_OBSERVE_ALL)) == 0) {
648 			break;
649 		}
650 		if ((ret = sk_priv_chk(p, cred,
651 		    PRIV_SKYWALK_REGISTER_USER_PIPE)) == 0) {
652 			break;
653 		}
654 		if ((ret = sk_priv_chk(p, cred,
655 		    PRIV_SKYWALK_REGISTER_KERNEL_PIPE)) == 0) {
656 			break;
657 		}
658 		if ((ret = sk_priv_chk(p, cred,
659 		    PRIV_SKYWALK_REGISTER_NET_IF)) == 0) {
660 			break;
661 		}
662 		if ((ret = sk_priv_chk(p, cred,
663 		    PRIV_SKYWALK_REGISTER_FLOW_SWITCH)) == 0) {
664 			break;
665 		}
666 		/* none set, so too bad */
667 		ret = EPERM;
668 	} while (0);
669 
670 #if (DEVELOPMENT || DEBUG)
671 	if (ret != 0) {
672 		SK_ERR("%s(%d) insufficient privilege to open nexus controller "
673 		    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ret);
674 	}
675 #endif /* !DEVELOPMENT && !DEBUG */
676 done:
677 	return ret;
678 }
679 
680 void
sk_gen_guard_id(boolean_t isch,const uuid_t uuid,guardid_t * guard)681 sk_gen_guard_id(boolean_t isch, const uuid_t uuid, guardid_t *guard)
682 {
683 #define GUARD_CH_SIG    0x4348  /* 'CH' */
684 #define GUARD_NX_SIG    0x4e58  /* 'NX' */
685 	union {
686 		uint8_t         _u8[8];
687 		uint16_t        _u16[4];
688 		uint64_t        _u64;
689 	} __u;
690 
691 	read_random(&__u._u16[0], sizeof(uint16_t));
692 	bcopy(uuid, (void *)&__u._u16[1], sizeof(uint16_t));
693 	__u._u16[2] = htole16(isch ? GUARD_CH_SIG : GUARD_NX_SIG);
694 	__u._u16[3] = htole16(0x534b);  /* 'SK' */
695 	VERIFY(__u._u64 != 0);
696 
697 	bzero(guard, sizeof(*guard));
698 	bcopy((void *)&__u._u64, guard, MIN(sizeof(*guard),
699 	    sizeof(uint64_t)));
700 }
701 
702 
703 extern char *
__counted_by(sizeof (uuid_string_t))704 __counted_by(sizeof(uuid_string_t))
705 sk_uuid_unparse(const uuid_t uu, uuid_string_t out)
706 {
707 	uuid_unparse_upper(uu, out);
708 	return out;
709 }
710 
711 #if SK_LOG
712 /*
713  * packet-dump function, user-supplied or static buffer.
714  * The destination buffer must be at least 30+4*len
715  *
716  * @param p
717  *   buffer to be dumped.
718  * @param len
719  *   buffer's total length.
720  * @param dumplen
721  *   length to be dumped.
722  * @param dst
723  *   destination char buffer. sk_dump_buf would be used if dst is NULL.
724  * @param lim
725  *   destination char buffer max length. Not used if dst is NULL.
726  *
727  * -fbounds-safety: Note that all callers of this function pass NULL and 0 for
728  * dst and lim, respectively.
729  */
730 const char *
__counted_by(lim)731 __counted_by(lim)
732 sk_dump(const char *label, const void *__sized_by(len) obj, int len, int dumplen,
733     char *__counted_by(lim) dst, int lim)
734 {
735 	int i, j, i0, n = 0;
736 	static char hex[] = "0123456789abcdef";
737 	const char *p = obj;    /* dump cursor */
738 	uint32_t size;
739 	char *__sized_by(size) o;        /* output position */
740 
741 #define P_HI(x) hex[((x) & 0xf0) >> 4]
742 #define P_LO(x) hex[((x) & 0xf)]
743 #define P_C(x)  ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
744 	if (dst == NULL) {
745 		dst = sk_dump_buf;
746 		lim = SK_DUMP_BUF_SIZE;
747 	} else if (lim <= 0 || lim > len) {
748 		dst = dst;
749 		lim = len;  /* rdar://117789233 */
750 	}
751 	dumplen = MIN(len, dumplen);
752 	o = dst;
753 	size = lim;
754 	n = scnprintf(o, lim, "%s 0x%llx len %d lim %d\n", label,
755 	    SK_KVA(p), len, lim);
756 	o += strbuflen(o, n);
757 	size -= n;
758 	/* hexdump routine */
759 	for (i = 0; i < dumplen;) {
760 		n = scnprintf(o, size, "%5d: ", i);
761 		o += n;
762 		size -= n;
763 		memset(o, ' ', 48);
764 		i0 = i;
765 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
766 			o[j * 3] = P_HI(p[i]);
767 			o[j * 3 + 1] = P_LO(p[i]);
768 		}
769 		i = i0;
770 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
771 			o[j + 48] = P_C(p[i]);
772 		}
773 		o[j + 48] = '\n';
774 		o += j + 49;
775 		size -= (j + 49);
776 	}
777 	*o = '\0';
778 #undef P_HI
779 #undef P_LO
780 #undef P_C
781 	return dst;
782 }
783 
784 /*
785  * "Safe" variant of proc_name_address(), meant to be used only for logging.
786  */
787 const char *
sk_proc_name_address(struct proc * p)788 sk_proc_name_address(struct proc *p)
789 {
790 	if (p == PROC_NULL) {
791 		return "proc_null";
792 	}
793 
794 	return proc_name_address(p);
795 }
796 
797 /*
798  * "Safe" variant of proc_pid(), mean to be used only for logging.
799  */
800 int
sk_proc_pid(struct proc * p)801 sk_proc_pid(struct proc *p)
802 {
803 	if (p == PROC_NULL) {
804 		return -1;
805 	}
806 
807 	return proc_pid(p);
808 }
809 
810 const char *
sk_sa_ntop(struct sockaddr * sa,char * __counted_by (addr_strlen)addr_str,size_t addr_strlen)811 sk_sa_ntop(struct sockaddr *sa, char *__counted_by(addr_strlen)addr_str,
812     size_t addr_strlen)
813 {
814 	const char *__null_terminated str = NULL;
815 
816 	addr_str[0] = '\0';
817 
818 	switch (sa->sa_family) {
819 	case AF_INET:
820 		str = inet_ntop(AF_INET, &SIN(sa)->sin_addr.s_addr,
821 		    addr_str, (socklen_t)addr_strlen);
822 		break;
823 
824 	case AF_INET6:
825 		str = inet_ntop(AF_INET6, &SIN6(sa)->sin6_addr,
826 		    addr_str, (socklen_t)addr_strlen);
827 		break;
828 
829 	default:
830 		str = __unsafe_null_terminated_from_indexable(addr_str);
831 		break;
832 	}
833 
834 	return str;
835 }
836 #endif /* SK_LOG */
837 
838 bool
sk_sa_has_addr(struct sockaddr * sa)839 sk_sa_has_addr(struct sockaddr *sa)
840 {
841 	switch (sa->sa_family) {
842 	case AF_INET:
843 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
844 		return SIN(sa)->sin_addr.s_addr != INADDR_ANY;
845 	case AF_INET6:
846 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
847 		return !IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr);
848 	default:
849 		return false;
850 	}
851 }
852 
853 bool
sk_sa_has_port(struct sockaddr * sa)854 sk_sa_has_port(struct sockaddr *sa)
855 {
856 	switch (sa->sa_family) {
857 	case AF_INET:
858 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
859 		return SIN(sa)->sin_port != 0;
860 	case AF_INET6:
861 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
862 		return SIN6(sa)->sin6_port != 0;
863 	default:
864 		return false;
865 	}
866 }
867 
868 /* returns port number in host byte order */
869 uint16_t
sk_sa_get_port(struct sockaddr * sa)870 sk_sa_get_port(struct sockaddr *sa)
871 {
872 	switch (sa->sa_family) {
873 	case AF_INET:
874 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
875 		return ntohs(SIN(sa)->sin_port);
876 	case AF_INET6:
877 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
878 		return ntohs(SIN6(sa)->sin6_port);
879 	default:
880 		VERIFY(0);
881 		/* NOTREACHED */
882 		__builtin_unreachable();
883 	}
884 }
885 
886 void
skywalk_kill_process(struct proc * p,uint64_t reason_code)887 skywalk_kill_process(struct proc *p, uint64_t reason_code)
888 {
889 	os_reason_t exit_reason = OS_REASON_NULL;
890 
891 	VERIFY(p != kernproc);
892 
893 	exit_reason = os_reason_create(OS_REASON_SKYWALK, reason_code);
894 	if (exit_reason == OS_REASON_NULL) {
895 		SK_ERR("%s(%d) unable to allocate memory for crash reason "
896 		    "0x%llX", sk_proc_name_address(p), sk_proc_pid(p),
897 		    reason_code);
898 	} else {
899 		exit_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
900 		SK_ERR("%s(%d) aborted for reason 0x%llX",
901 		    sk_proc_name_address(p), sk_proc_pid(p), reason_code);
902 	}
903 
904 	psignal_try_thread_with_reason(p, current_thread(), SIGABRT,
905 	    exit_reason);
906 }
907 
908 #if (DEVELOPMENT || DEBUG)
909 #define SK_MEMCMP_LEN 256               /* length of each section */
910 #define SK_MASK_MAXLEN 80               /* maximum mask length */
911 
912 #define SK_MEMCMP_MASK_VERIFY(t, l, lr) do {                            \
913 	_CASSERT(sizeof(t##_m) == SK_MASK_MAXLEN);                      \
914 	if ((sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) ^           \
915 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
916 	        panic_plain("\nbroken: " #t " using "                   \
917 	            "sk_memcmp_mask_" #l " at i=%d\n", i);              \
918 	/* NOTREACHED */                                        \
919 	        __builtin_unreachable();                                \
920 	}                                                               \
921 	if ((sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) ^  \
922 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
923 	        panic_plain("\nbroken: " #t " using "                   \
924 	            "sk_memcmp_mask_" #l "_scalar at i=%d\n", i);       \
925 	/* NOTREACHED */                                        \
926 	        __builtin_unreachable();                                \
927 	}                                                               \
928 } while (0)
929 
930 #define SK_MEMCMP_MASK_MATCH_VERIFY(t, l) do {                          \
931 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) {            \
932 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
933 	            " mismatch (expected match) at i=%d s1=0x%x"        \
934 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
935 	/* NOTREACHED */                                        \
936 	        __builtin_unreachable();                                \
937 	}                                                               \
938 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) {   \
939 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
940 	            "_scalar mismatch (expected match) at i=%d s1=0x%x" \
941 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
942 	/* NOTREACHED */                                        \
943 	        __builtin_unreachable();                                \
944 	}                                                               \
945 } while (0)
946 
947 #define SK_MEMCMP_MASK_MISMATCH_VERIFY(t, l) do {                       \
948 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) == 0) {            \
949 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
950 	            " match (expected mismatch) at i=%d s1=0x%x"        \
951 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
952 	/* NOTREACHED */                                        \
953 	        __builtin_unreachable();                                \
954 	}                                                               \
955 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) == 0) {   \
956 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
957 	            "_scalar match (expected mismatch) at i=%d "        \
958 	            "s1=0x%x s2=0x%x\n", i, hdr1[i], hdr2[i]);          \
959 	/* NOTREACHED */                                        \
960 	        __builtin_unreachable();                                \
961 	}                                                               \
962 } while (0)
963 
964 #define SK_MEMCMP_BYTEMASK_VERIFY(t) do {                               \
965 	if ((sk_memcmp_mask(hdr1, hdr2, t##_m, i) != 0) ^               \
966 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
967 	        panic_plain("\nbroken: " #t " using "                   \
968 	            "sk_memcmp_mask at i=%d\n", i);                     \
969 	/* NOTREACHED */                                        \
970 	        __builtin_unreachable();                                \
971 	}                                                               \
972 	if ((sk_memcmp_mask_scalar(hdr1, hdr2, t##_m, i) != 0) ^        \
973 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
974 	        panic_plain("\nbroken: " #t " using "                   \
975 	            "sk_memcmp_mask_scalar at i=%d\n", i);              \
976 	/* NOTREACHED */                                        \
977 	        __builtin_unreachable();                                \
978 	}                                                               \
979 } while (0)
980 
981 static inline int
skywalk_memcmp_mask_ref(const uint8_t * __sized_by (n)src1,const uint8_t * __sized_by (n)src2,const uint8_t * __sized_by (n)byte_mask,size_t n)982 skywalk_memcmp_mask_ref(const uint8_t *__sized_by(n)src1,
983     const uint8_t *__sized_by(n)src2, const uint8_t *__sized_by(n)byte_mask,
984     size_t n)
985 {
986 	uint32_t result = 0;
987 	for (size_t i = 0; i < n; i++) {
988 		result |= (src1[i] ^ src2[i]) & byte_mask[i];
989 	}
990 	return result;
991 }
992 
993 static void
skywalk_memcmp_mask_self_tests(void)994 skywalk_memcmp_mask_self_tests(void)
995 {
996 	static const uint8_t ipv4_m[] = {
997 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xff, 0xff,
998 		0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
999 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1000 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1001 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1002 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1003 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1004 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1005 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1006 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1007 	};
1008 	static const uint8_t ipv6_m[] = {
1009 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1010 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1011 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1012 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1013 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1014 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1015 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1016 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1017 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1018 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1019 	};
1020 	static const uint8_t tcp_m[] = {
1021 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1022 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1023 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1024 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1025 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1026 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1027 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1028 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1029 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1030 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1031 	};
1032 	static const uint8_t ipv6_tcp_m[] = {
1033 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1034 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1035 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1036 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1037 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1038 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1039 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1040 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1041 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1042 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1043 	};
1044 	static const uint8_t udp_m[] = {
1045 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1046 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1047 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1048 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1049 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1050 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1051 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1052 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1053 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1054 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1055 	};
1056 	static const uint8_t fk_all_m[] = {
1057 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1058 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1059 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1060 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1061 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1062 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1063 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1064 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1065 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1066 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1067 	};
1068 	static const uint8_t fk_t2_m[] = {
1069 		0x0a, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
1070 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1071 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1072 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1073 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1074 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1075 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1076 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1077 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1078 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1079 	};
1080 	static const uint8_t fk_t3_m[] = {
1081 		0x0f, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1082 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1083 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1084 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1085 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1086 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1087 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1088 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1089 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1090 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1091 	};
1092 	static const uint8_t fk_t4_m[] = {
1093 		0x2f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1094 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1095 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1096 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1097 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1098 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1099 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1100 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1101 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1102 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1103 	};
1104 	static const uint8_t fk_t5_m[] = {
1105 		0x3f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1106 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1107 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1108 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1109 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1110 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1111 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1112 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1113 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1114 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1115 	};
1116 	static const uint8_t fk_i1_m[] = {
1117 		0x02, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
1118 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1119 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1120 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1121 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1122 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1123 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1124 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1125 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1126 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1127 	};
1128 	static const uint8_t fk_i2_m[] = {
1129 		0x07, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1130 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1131 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1132 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1133 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1134 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1135 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1136 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1137 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1138 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1139 	};
1140 	static const uint8_t fk_i3_m[] = {
1141 		0x17, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1142 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1143 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1144 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1145 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1146 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1147 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1148 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1149 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1150 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1151 	};
1152 
1153 	/* validate flow entry mask (2-tuple) */
1154 	_CASSERT(FKMASK_2TUPLE == (FKMASK_PROTO | FKMASK_SPORT));
1155 	VERIFY(fk_mask_2tuple.fk_mask == FKMASK_2TUPLE);
1156 	VERIFY(fk_mask_2tuple.fk_ipver == 0);
1157 	VERIFY(fk_mask_2tuple.fk_proto == 0xff);
1158 	VERIFY(fk_mask_2tuple.fk_sport == 0xffff);
1159 	VERIFY(fk_mask_2tuple.fk_dport == 0);
1160 	VERIFY(fk_mask_2tuple.fk_src._addr64[0] == 0);
1161 	VERIFY(fk_mask_2tuple.fk_src._addr64[1] == 0);
1162 	VERIFY(fk_mask_2tuple.fk_dst._addr64[0] == 0);
1163 	VERIFY(fk_mask_2tuple.fk_dst._addr64[1] == 0);
1164 	VERIFY(fk_mask_2tuple.fk_pad[0] == 0);
1165 
1166 	_CASSERT(FKMASK_3TUPLE == (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC));
1167 	VERIFY(fk_mask_3tuple.fk_mask == FKMASK_3TUPLE);
1168 	VERIFY(fk_mask_3tuple.fk_ipver == 0xff);
1169 	VERIFY(fk_mask_3tuple.fk_proto == 0xff);
1170 	VERIFY(fk_mask_3tuple.fk_sport == 0xffff);
1171 	VERIFY(fk_mask_3tuple.fk_dport == 0);
1172 	VERIFY(fk_mask_3tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1173 	VERIFY(fk_mask_3tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1174 	VERIFY(fk_mask_3tuple.fk_dst._addr64[0] == 0);
1175 	VERIFY(fk_mask_3tuple.fk_dst._addr64[1] == 0);
1176 	VERIFY(fk_mask_3tuple.fk_pad[0] == 0);
1177 
1178 	_CASSERT(FKMASK_4TUPLE == (FKMASK_3TUPLE | FKMASK_DPORT));
1179 	VERIFY(fk_mask_4tuple.fk_mask == FKMASK_4TUPLE);
1180 	VERIFY(fk_mask_4tuple.fk_ipver == 0xff);
1181 	VERIFY(fk_mask_4tuple.fk_proto == 0xff);
1182 	VERIFY(fk_mask_4tuple.fk_sport == 0xffff);
1183 	VERIFY(fk_mask_4tuple.fk_dport == 0xffff);
1184 	VERIFY(fk_mask_4tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1185 	VERIFY(fk_mask_4tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1186 	VERIFY(fk_mask_4tuple.fk_dst._addr64[0] == 0);
1187 	VERIFY(fk_mask_4tuple.fk_dst._addr64[1] == 0);
1188 	VERIFY(fk_mask_4tuple.fk_pad[0] == 0);
1189 
1190 	_CASSERT(FKMASK_5TUPLE == (FKMASK_4TUPLE | FKMASK_DST));
1191 	VERIFY(fk_mask_5tuple.fk_mask == FKMASK_5TUPLE);
1192 	VERIFY(fk_mask_5tuple.fk_ipver == 0xff);
1193 	VERIFY(fk_mask_5tuple.fk_proto == 0xff);
1194 	VERIFY(fk_mask_5tuple.fk_sport == 0xffff);
1195 	VERIFY(fk_mask_5tuple.fk_dport == 0xffff);
1196 	VERIFY(fk_mask_5tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1197 	VERIFY(fk_mask_5tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1198 	VERIFY(fk_mask_5tuple.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1199 	VERIFY(fk_mask_5tuple.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1200 	VERIFY(fk_mask_5tuple.fk_pad[0] == 0);
1201 
1202 	_CASSERT(FKMASK_IPFLOW1 == FKMASK_PROTO);
1203 	VERIFY(fk_mask_ipflow1.fk_mask == FKMASK_IPFLOW1);
1204 	VERIFY(fk_mask_ipflow1.fk_ipver == 0);
1205 	VERIFY(fk_mask_ipflow1.fk_proto == 0xff);
1206 	VERIFY(fk_mask_ipflow1.fk_sport == 0);
1207 	VERIFY(fk_mask_ipflow1.fk_dport == 0);
1208 	VERIFY(fk_mask_ipflow1.fk_src._addr64[0] == 0);
1209 	VERIFY(fk_mask_ipflow1.fk_src._addr64[1] == 0);
1210 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[0] == 0);
1211 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[1] == 0);
1212 	VERIFY(fk_mask_ipflow1.fk_pad[0] == 0);
1213 
1214 	_CASSERT(FKMASK_IPFLOW2 == (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC));
1215 	VERIFY(fk_mask_ipflow2.fk_mask == FKMASK_IPFLOW2);
1216 	VERIFY(fk_mask_ipflow2.fk_ipver == 0xff);
1217 	VERIFY(fk_mask_ipflow2.fk_proto == 0xff);
1218 	VERIFY(fk_mask_ipflow2.fk_sport == 0);
1219 	VERIFY(fk_mask_ipflow2.fk_dport == 0);
1220 	VERIFY(fk_mask_ipflow2.fk_src._addr64[0] == 0xffffffffffffffffULL);
1221 	VERIFY(fk_mask_ipflow2.fk_src._addr64[1] == 0xffffffffffffffffULL);
1222 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[0] == 0);
1223 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[1] == 0);
1224 	VERIFY(fk_mask_ipflow2.fk_pad[0] == 0);
1225 
1226 	_CASSERT(FKMASK_IPFLOW3 == (FKMASK_IPFLOW2 | FKMASK_DST));
1227 	VERIFY(fk_mask_ipflow3.fk_mask == FKMASK_IPFLOW3);
1228 	VERIFY(fk_mask_ipflow3.fk_ipver == 0xff);
1229 	VERIFY(fk_mask_ipflow3.fk_proto == 0xff);
1230 	VERIFY(fk_mask_ipflow3.fk_sport == 0);
1231 	VERIFY(fk_mask_ipflow3.fk_dport == 0);
1232 	VERIFY(fk_mask_ipflow3.fk_src._addr64[0] == 0xffffffffffffffffULL);
1233 	VERIFY(fk_mask_ipflow3.fk_src._addr64[1] == 0xffffffffffffffffULL);
1234 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1235 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1236 	VERIFY(fk_mask_ipflow3.fk_pad[0] == 0);
1237 
1238 	VERIFY(sk_dump_buf != NULL);
1239 
1240 	/* reset sk_dump_buf */
1241 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1242 
1243 	/*
1244 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1245 	 * section begins on a 128-bit boundary, and is a multiple of
1246 	 * 64-bytes len.  A section is SK_MEMCMP_LEN-bytes long,
1247 	 * which means we need at least 16+(3*SK_MEMCMP_LEN) bytes.
1248 	 *
1249 	 * 1st section is s1 -> (hdr1 aligned to 16-bytes)
1250 	 * 2nd section is s2 -> (hdr2 = hdr1 + SK_MEMCMP_LEN)
1251 	 * 3rd section is s3 -> (mask = hdr2 + SK_MEMCMP_LEN)
1252 	 */
1253 	void *s1, *s2, *s3;
1254 	uintptr_t diff;
1255 
1256 	s1 = sk_dump_buf;
1257 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1258 		diff = P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN) - (uintptr_t)s1;
1259 		s1 = (void *)((char *)s1 + diff);
1260 	}
1261 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1262 	s2 = (void *)((char *)s1 + SK_MEMCMP_LEN);
1263 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1264 	s3 = (void *)((char *)s2 + SK_MEMCMP_LEN);
1265 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1266 
1267 	uint8_t *hdr1 = s1;
1268 	uint8_t *hdr2 = s2;
1269 	uint8_t *byte_m = s3;
1270 
1271 	/* fill byte mask with random data */
1272 	read_frandom(byte_m, SK_MEMCMP_LEN);
1273 
1274 	kprintf("Skywalk: memcmp mask ... ");
1275 
1276 	int i;
1277 	for (i = 0; i < 80; i++) {
1278 		hdr1[i] = 1;
1279 		SK_MEMCMP_MASK_VERIFY(ipv4, 32, 20);
1280 		SK_MEMCMP_MASK_VERIFY(ipv6, 64, 40);
1281 		SK_MEMCMP_MASK_VERIFY(ipv6_tcp, 80, 64);
1282 		SK_MEMCMP_MASK_VERIFY(tcp, 32, 24);
1283 		SK_MEMCMP_MASK_VERIFY(udp, 16, 6);
1284 		SK_MEMCMP_MASK_VERIFY(fk_all, 48, 48);
1285 		SK_MEMCMP_MASK_VERIFY(fk_t2, 48, 48);
1286 		SK_MEMCMP_MASK_VERIFY(fk_t3, 48, 48);
1287 		SK_MEMCMP_MASK_VERIFY(fk_t4, 48, 48);
1288 		SK_MEMCMP_MASK_VERIFY(fk_t5, 48, 48);
1289 		SK_MEMCMP_MASK_VERIFY(fk_i1, 48, 48);
1290 		SK_MEMCMP_MASK_VERIFY(fk_i2, 48, 48);
1291 		SK_MEMCMP_MASK_VERIFY(fk_i3, 48, 48);
1292 		hdr2[i] = 1;
1293 	}
1294 
1295 	bzero(hdr1, SK_MEMCMP_LEN);
1296 	bzero(hdr2, SK_MEMCMP_LEN);
1297 
1298 	/* re-fill byte mask with random data */
1299 	read_frandom(byte_m, SK_MEMCMP_LEN);
1300 
1301 	for (i = 0; i < SK_MEMCMP_LEN; i++) {
1302 		hdr1[i] = 1;
1303 		SK_MEMCMP_BYTEMASK_VERIFY(byte);
1304 		hdr2[i] = 1;
1305 	}
1306 
1307 	/* fill hdr1 and hd2 with random data */
1308 	read_frandom(hdr1, SK_MEMCMP_LEN);
1309 	bcopy(hdr1, hdr2, SK_MEMCMP_LEN);
1310 	memset(byte_m, 0xff, SK_MEMCMP_LEN);
1311 
1312 	for (i = 0; i < 80; i++) {
1313 		uint8_t val = hdr2[i];
1314 		uint8_t mval = byte_m[i];
1315 
1316 		while (hdr2[i] == hdr1[i] || hdr2[i] == 0) {
1317 			uint8_t newval;
1318 			read_frandom(&newval, sizeof(newval));
1319 			hdr2[i] = newval;
1320 		}
1321 		if (i < 16) {
1322 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 16);
1323 		} else if (i < 32) {
1324 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 32);
1325 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1326 		} else if (i < 48) {
1327 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 48);
1328 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1329 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1330 		} else if (i < 64) {
1331 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 64);
1332 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1333 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1334 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1335 		} else if (i < 80) {
1336 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 80);
1337 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1338 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1339 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1340 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1341 		}
1342 		byte_m[i] = 0;
1343 		if (i < 16) {
1344 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1345 		} else if (i < 32) {
1346 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1347 		} else if (i < 48) {
1348 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1349 		} else if (i < 64) {
1350 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1351 		} else if (i < 80) {
1352 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 80);
1353 		}
1354 		hdr2[i] = val;
1355 		byte_m[i] = mval;
1356 	}
1357 
1358 	kprintf("PASSED\n");
1359 }
1360 
1361 #define SK_COPY_LEN     128             /* length of each section */
1362 
1363 #define SK_COPY_PREPARE(t) do {                                         \
1364 	bzero(s2, SK_COPY_LEN);                                         \
1365 	bzero(s3, SK_COPY_LEN);                                         \
1366 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1367 	kprintf("Skywalk: " #t " ... ");                                \
1368 } while (0)
1369 
1370 #define SK_COPY_VERIFY(t) do {                                          \
1371 	if (_s1 != s1 || _s2 != s2 || _s3 != s3) {                      \
1372 	        panic_plain("\ninput registers clobbered: " #t "\n");   \
1373 	/* NOTREACHED */                                        \
1374 	        __builtin_unreachable();                                \
1375 	}                                                               \
1376 	if (bcmp(s2, s3, SK_COPY_LEN) != 0) {                           \
1377 	        panic_plain("\nbroken: " #t "\n");                      \
1378 	/* NOTREACHED */                                        \
1379 	        __builtin_unreachable();                                \
1380 	} else {                                                        \
1381 	        kprintf("PASSED\n");                                    \
1382 	}                                                               \
1383 } while (0)
1384 
1385 #define SK_ZERO_PREPARE(t) do {                                         \
1386 	bcopy(s1, s2, SK_COPY_LEN);                                     \
1387 	bcopy(s1, s3, SK_COPY_LEN);                                     \
1388 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1389 	kprintf("Skywalk: " #t " ... ");                                \
1390 } while (0)
1391 
1392 #define SK_ZERO_VERIFY(t)       SK_COPY_VERIFY(t)
1393 
1394 static void
skywalk_self_tests(void)1395 skywalk_self_tests(void)
1396 {
1397 	void *s1, *s2, *s3;
1398 	void *_s1, *_s2, *_s3;
1399 	uintptr_t diff;
1400 
1401 	VERIFY(sk_dump_buf != NULL);
1402 
1403 	/*
1404 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1405 	 * section begins on a 128-bit boundary, and is a multiple of
1406 	 * 64-bytes len.  A section is 128-bytes long, which means we
1407 	 * need at least 16+(3*128) bytes.
1408 	 *
1409 	 * 1st section is source buffer full of random data;
1410 	 * 2nd section is reference target based on bcopy;
1411 	 * 3rd section is test target base on our stuff.
1412 	 */
1413 	_CASSERT(SK_COPY_LEN != 0 && (SK_COPY_LEN % 128) == 0);
1414 	_CASSERT((SK_COPY_LEN % 16) == 0);
1415 	_CASSERT((SK_DUMP_BUF_ALIGN % 16) == 0);
1416 	_CASSERT(SK_DUMP_BUF_SIZE >= (SK_DUMP_BUF_ALIGN + (SK_COPY_LEN * 3)));
1417 
1418 	s1 = sk_dump_buf;
1419 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1420 		diff = P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN) - (uintptr_t)s1;
1421 		s1 = (void *)((char *)s1 + diff);
1422 	}
1423 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1424 	s2 = (void *)((char *)s1 + SK_COPY_LEN);
1425 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1426 	s3 = (void *)((char *)s2 + SK_COPY_LEN);
1427 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1428 
1429 	/* fill s1 with random data */
1430 	read_frandom(s1, SK_COPY_LEN);
1431 
1432 	kprintf("Skywalk: running self-tests\n");
1433 
1434 	/* Copy 8-bytes, 64-bit aligned */
1435 	SK_COPY_PREPARE(sk_copy64_8);
1436 	bcopy(s1, s2, 8);
1437 	sk_copy64_8((uint64_t *)s1, (uint64_t *)s3);
1438 	SK_COPY_VERIFY(sk_copy64_8);
1439 
1440 	/* Copy 8-bytes, 32-bit aligned */
1441 	SK_COPY_PREPARE(sk_copy32_8);
1442 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1443 	    (void *)((char *)s2 + sizeof(uint32_t)), 8);
1444 	sk_copy32_8((void *)((char *)s1 + sizeof(uint32_t)),
1445 	    (void *)((char *)s3 + sizeof(uint32_t)));
1446 	SK_COPY_VERIFY(sk_copy32_8);
1447 
1448 	/* Copy 16-bytes, 64-bit aligned */
1449 	SK_COPY_PREPARE(sk_copy64_16);
1450 	bcopy(s1, s2, 16);
1451 	sk_copy64_16((uint64_t *)s1, (uint64_t *)s3);
1452 	SK_COPY_VERIFY(sk_copy64_16);
1453 
1454 	/* Copy 16-bytes, 32-bit aligned */
1455 	SK_COPY_PREPARE(sk_copy32_16);
1456 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1457 	    (void *)((char *)s2 + sizeof(uint32_t)), 16);
1458 	sk_copy32_16((void *)((char *)s1 + sizeof(uint32_t)),
1459 	    (void *)((char *)s3 + sizeof(uint32_t)));
1460 	SK_COPY_VERIFY(sk_copy32_16);
1461 
1462 	/* Copy 20-bytes, 64-bit aligned */
1463 	SK_COPY_PREPARE(sk_copy64_20);
1464 	bcopy(s1, s2, 20);
1465 	sk_copy64_20((uint64_t *)s1, (uint64_t *)s3);
1466 	SK_COPY_VERIFY(sk_copy64_20);
1467 
1468 	/* Copy 24-bytes, 64-bit aligned */
1469 	SK_COPY_PREPARE(sk_copy64_24);
1470 	bcopy(s1, s2, 24);
1471 	sk_copy64_24((uint64_t *)s1, (uint64_t *)s3);
1472 	SK_COPY_VERIFY(sk_copy64_24);
1473 
1474 	/* Copy 32-bytes, 64-bit aligned */
1475 	SK_COPY_PREPARE(sk_copy64_32);
1476 	bcopy(s1, s2, 32);
1477 	sk_copy64_32((uint64_t *)s1, (uint64_t *)s3);
1478 	SK_COPY_VERIFY(sk_copy64_32);
1479 
1480 	/* Copy 32-bytes, 32-bit aligned */
1481 	SK_COPY_PREPARE(sk_copy32_32);
1482 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1483 	    (void *)((char *)s2 + sizeof(uint32_t)), 32);
1484 	sk_copy32_32((void *)((char *)s1 + sizeof(uint32_t)),
1485 	    (void *)((char *)s3 + sizeof(uint32_t)));
1486 	SK_COPY_VERIFY(sk_copy32_32);
1487 
1488 	/* Copy 40-bytes, 64-bit aligned */
1489 	SK_COPY_PREPARE(sk_copy64_40);
1490 	bcopy(s1, s2, 40);
1491 	sk_copy64_40((uint64_t *)s1, (uint64_t *)s3);
1492 	SK_COPY_VERIFY(sk_copy64_40);
1493 
1494 	/* Copy entire section in 64-bytes chunks, 64-bit aligned */
1495 	SK_COPY_PREPARE(sk_copy64_64x);
1496 	bcopy(s1, s2, SK_COPY_LEN);
1497 	sk_copy64_64x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1498 	SK_COPY_VERIFY(sk_copy64_64x);
1499 
1500 	/* Copy entire section in 32-bytes chunks, 64-bit aligned */
1501 	SK_COPY_PREPARE(sk_copy64_32x);
1502 	bcopy(s1, s2, SK_COPY_LEN);
1503 	sk_copy64_32x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1504 	SK_COPY_VERIFY(sk_copy64_32x);
1505 
1506 	/* Copy entire section in 8-bytes chunks, 64-bit aligned */
1507 	SK_COPY_PREPARE(sk_copy64_8x);
1508 	bcopy(s1, s2, SK_COPY_LEN);
1509 	sk_copy64_8x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1510 	SK_COPY_VERIFY(sk_copy64_8x);
1511 
1512 	/* Copy entire section in 4-bytes chunks, 64-bit aligned */
1513 	SK_COPY_PREPARE(sk_copy64_4x);
1514 	bcopy(s1, s2, SK_COPY_LEN);
1515 	sk_copy64_4x((uint32_t *)s1, (uint32_t *)s3, SK_COPY_LEN);
1516 	SK_COPY_VERIFY(sk_copy64_4x);
1517 
1518 	/*
1519 	 * Re-use sk_dump_buf for testing sk_zero, same principle as above.
1520 	 *
1521 	 * 1st section is source buffer full of random data;
1522 	 * 2nd section is reference target based on bzero;
1523 	 * 3rd section is test target base on our stuff.
1524 	 */
1525 	SK_ZERO_PREPARE(sk_zero_16);
1526 	bzero(s2, 16);
1527 	sk_zero_16(s3);
1528 	SK_ZERO_VERIFY(sk_zero_16);
1529 
1530 	SK_ZERO_PREPARE(sk_zero_32);
1531 	bzero(s2, 32);
1532 	sk_zero_32(s3);
1533 	SK_ZERO_VERIFY(sk_zero_32);
1534 
1535 	SK_ZERO_PREPARE(sk_zero_48);
1536 	bzero(s2, 48);
1537 	sk_zero_48(s3);
1538 	SK_ZERO_VERIFY(sk_zero_48);
1539 
1540 	SK_ZERO_PREPARE(sk_zero_128);
1541 	bzero(s2, 128);
1542 	sk_zero_128(s3);
1543 	SK_ZERO_VERIFY(sk_zero_128);
1544 
1545 	/* Perform memcmp with mask self tests */
1546 	skywalk_memcmp_mask_self_tests();
1547 
1548 	/* reset sk_dump_buf */
1549 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1550 
1551 	/* Keep packet trace code in sync with ariadne plist */
1552 	_CASSERT(SK_KTRACE_AON_IF_STATS == 0x8100004);
1553 
1554 	_CASSERT(SK_KTRACE_FSW_DEV_RING_FLUSH == 0x8110004);
1555 	_CASSERT(SK_KTRACE_FSW_USER_RING_FLUSH == 0x8110008);
1556 	_CASSERT(SK_KTRACE_FSW_FLOW_TRACK_RTT == 0x8110010);
1557 
1558 	_CASSERT(SK_KTRACE_NETIF_RING_TX_REFILL == 0x8120004);
1559 	_CASSERT(SK_KTRACE_NETIF_HOST_ENQUEUE == 0x8120008);
1560 	_CASSERT(SK_KTRACE_NETIF_MIT_RX_INTR == 0x812000c);
1561 	_CASSERT(SK_KTRACE_NETIF_COMMON_INTR == 0x8120010);
1562 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT == 0x8120014);
1563 	_CASSERT(SK_KTRACE_NETIF_RX_NOTIFY_FAST == 0x8120018);
1564 
1565 	_CASSERT(SK_KTRACE_CHANNEL_TX_REFILL == 0x8130004);
1566 
1567 	_CASSERT(SK_KTRACE_PKT_RX_DRV == 0x8140004);
1568 	_CASSERT(SK_KTRACE_PKT_RX_FSW == 0x8140008);
1569 	_CASSERT(SK_KTRACE_PKT_RX_CHN == 0x814000c);
1570 	_CASSERT(SK_KTRACE_PKT_TX_FSW == 0x8140040);
1571 	_CASSERT(SK_KTRACE_PKT_TX_AQM == 0x8140044);
1572 	_CASSERT(SK_KTRACE_PKT_TX_DRV == 0x8140048);
1573 }
1574 #endif /* DEVELOPMENT || DEBUG */
1575