xref: /xnu-12377.81.4/bsd/skywalk/core/skywalk.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <pexpert/pexpert.h>    /* for PE_parse_boot_argn */
31 #include <sys/codesign.h>       /* for csproc_get_platform_binary */
32 #include <sys/reason.h>
33 #include <netinet/inp_log.h>
34 #if CONFIG_MACF
35 #include <security/mac_framework.h>
36 #endif /* CONFIG_MACF */
37 
38 #ifndef htole16
39 #if BYTE_ORDER == LITTLE_ENDIAN
40 #define htole16(x)      ((uint16_t)(x))
41 #else /* BYTE_ORDER != LITTLE_ENDIAN */
42 #define htole16(x)      bswap16((x))
43 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
44 #endif /* htole16 */
45 
46 LCK_GRP_DECLARE(sk_lock_group, "sk_lock");
47 LCK_ATTR_DECLARE(sk_lock_attr, 0, 0);
48 LCK_MTX_DECLARE_ATTR(sk_lock, &sk_lock_group, &sk_lock_attr);
49 
50 static void skywalk_fini(void);
51 static int sk_priv_chk(proc_t, kauth_cred_t, int);
52 
53 static int __sk_inited = 0;
54 uint64_t sk_verbose;
55 
56 #if (DEVELOPMENT || DEBUG)
57 size_t sk_copy_thres = SK_COPY_THRES;
58 #endif /* DEVELOPMENT || DEBUG */
59 uint64_t sk_features =
60 #if SKYWALK
61     SK_FEATURE_SKYWALK |
62 #endif
63 #if DEVELOPMENT
64     SK_FEATURE_DEVELOPMENT |
65 #endif
66 #if DEBUG
67     SK_FEATURE_DEBUG |
68 #endif
69 #if CONFIG_NEXUS_FLOWSWITCH
70     SK_FEATURE_NEXUS_FLOWSWITCH |
71 #endif
72 #if CONFIG_NEXUS_NETIF
73     SK_FEATURE_NEXUS_NETIF |
74 #endif
75 #if CONFIG_NEXUS_USER_PIPE
76     SK_FEATURE_NEXUS_USER_PIPE |
77 #endif
78 #if CONFIG_NEXUS_KERNEL_PIPE
79     SK_FEATURE_NEXUS_KERNEL_PIPE |
80 #endif
81 #if CONFIG_NEXUS_KERNEL_PIPE && (DEVELOPMENT || DEBUG)
82     SK_FEATURE_NEXUS_KERNEL_PIPE_LOOPBACK |
83 #endif
84 #if (DEVELOPMENT || DEBUG)
85     SK_FEATURE_DEV_OR_DEBUG |
86 #endif
87     0;
88 
89 uint32_t sk_opp_defunct = 0;    /* opportunistic defunct */
90 
91 /* checksum offload is generic to any nexus (not specific to flowswitch) */
92 uint32_t sk_cksum_tx = 1;       /* advertise outbound offload */
93 uint32_t sk_cksum_rx = 1;       /* perform inbound checksum offload */
94 
95 /* guard pages */
96 uint32_t sk_guard = 0;          /* guard pages (0: disable) */
97 #define SK_GUARD_MIN    1       /* minimum # of guard pages */
98 #define SK_GUARD_MAX    4       /* maximum # of guard pages */
99 uint32_t sk_headguard_sz = SK_GUARD_MIN; /* # of leading guard pages */
100 uint32_t sk_tailguard_sz = SK_GUARD_MIN; /* # of trailing guard pages */
101 
102 #if (DEVELOPMENT || DEBUG)
103 uint32_t sk_txring_sz = 0;      /* flowswitch */
104 uint32_t sk_rxring_sz = 0;      /* flowswitch */
105 uint32_t sk_net_txring_sz = 0;  /* netif adapter */
106 uint32_t sk_net_rxring_sz = 0;  /* netif adapter */
107 uint32_t sk_min_pool_size = 0;  /* minimum packet pool size */
108 #endif /* !DEVELOPMENT && !DEBUG */
109 
110 uint32_t sk_max_flows = NX_FLOWADV_DEFAULT;
111 uint32_t sk_fadv_nchunks;       /* # of FO_FLOWADV_CHUNK in bitmap */
112 uint32_t sk_netif_compat_txmodel = NETIF_COMPAT_TXMODEL_DEFAULT;
113 uint32_t sk_netif_native_txmodel = NETIF_NATIVE_TXMODEL_DEFAULT;
114 /*
115  * Configures the RX aggregation logic for TCP in flowswitch.
116  * A non-zero value enables the aggregation logic, with the maximum
117  * aggregation length (in bytes) limited to this value.
118  *
119  * DO NOT increase beyond 16KB. If you do, we end up corrupting the data-stream
120  * as we create aggregate-mbufs with a pktlen > 16KB but only a single element.
121  */
122 uint32_t sk_fsw_rx_agg_tcp = 16384;
123 
124 /*
125  * Forces the RX host path to use or not use aggregation, regardless of the
126  * existence of filters (see sk_fsw_rx_agg_tcp_host_t for valid values).
127  */
128 uint32_t sk_fsw_rx_agg_tcp_host = SK_FSW_RX_AGG_TCP_HOST_AUTO;
129 
130 /*
131  * Configures the skywalk infrastructure for handling TCP TX aggregation.
132  * A non-zero value enables the support.
133  */
134 uint32_t sk_fsw_tx_agg_tcp = 1;
135 /*
136  * Configuration to limit the number of buffers for flowswitch VP channel.
137  */
138 uint32_t sk_fsw_max_bufs = 0;
139 /*
140  * GSO MTU for the channel path:
141  *   > 0: enable GSO and use value as the largest supported segment size
142  *  == 0: disable GSO
143  */
144 uint32_t sk_fsw_gso_mtu = 16 * 1024;
145 
146 /* list of interfaces that allow direct open from userspace */
147 #define SK_NETIF_DIRECT_MAX     8
148 char sk_netif_direct[SK_NETIF_DIRECT_MAX][IFXNAMSIZ];
149 uint32_t sk_netif_direct_cnt = 0;
150 
151 uint16_t sk_tx_delay_qlen = 16;                 /* packets */
152 uint16_t sk_tx_delay_timeout = (1 * 1000);        /* microseconds */
153 
154 #define SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ     64
155 #define SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ     64
156 uint32_t sk_netif_compat_aux_cell_tx_ring_sz =
157     SK_NETIF_COMPAT_AUX_CELL_TX_RING_SZ;
158 uint32_t sk_netif_compat_aux_cell_rx_ring_sz =
159     SK_NETIF_COMPAT_AUX_CELL_RX_RING_SZ;
160 
161 /* Wi-Fi Access Point */
162 #define SK_NETIF_COMPAT_WAP_TX_RING_SZ  128
163 #define SK_NETIF_COMPAT_WAP_RX_RING_SZ  128
164 uint32_t sk_netif_compat_wap_tx_ring_sz = SK_NETIF_COMPAT_WAP_TX_RING_SZ;
165 uint32_t sk_netif_compat_wap_rx_ring_sz = SK_NETIF_COMPAT_WAP_RX_RING_SZ;
166 
167 /* AWDL */
168 #define SK_NETIF_COMPAT_AWDL_TX_RING_SZ 128
169 #define SK_NETIF_COMPAT_AWDL_RX_RING_SZ 128
170 uint32_t sk_netif_compat_awdl_tx_ring_sz = SK_NETIF_COMPAT_AWDL_TX_RING_SZ;
171 uint32_t sk_netif_compat_awdl_rx_ring_sz = SK_NETIF_COMPAT_AWDL_RX_RING_SZ;
172 
173 /* Wi-Fi Infrastructure */
174 #define SK_NETIF_COMPAT_WIF_TX_RING_SZ  128
175 #define SK_NETIF_COMPAT_WIF_RX_RING_SZ  128
176 uint32_t sk_netif_compat_wif_tx_ring_sz = SK_NETIF_COMPAT_WIF_TX_RING_SZ;
177 uint32_t sk_netif_compat_wif_rx_ring_sz = SK_NETIF_COMPAT_WIF_RX_RING_SZ;
178 
179 #define SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ      128
180 #define SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ      128
181 uint32_t sk_netif_compat_usb_eth_tx_ring_sz =
182     SK_NETIF_COMPAT_USB_ETH_TX_RING_SZ;
183 uint32_t sk_netif_compat_usb_eth_rx_ring_sz =
184     SK_NETIF_COMPAT_USB_ETH_RX_RING_SZ;
185 
186 #define SK_NETIF_COMPAT_RX_MBQ_LIMIT    8192
187 int sk_netif_compat_rx_mbq_limit = SK_NETIF_COMPAT_RX_MBQ_LIMIT;
188 
189 uint32_t sk_netif_tx_mit = SK_NETIF_MIT_AUTO;
190 uint32_t sk_netif_rx_mit = SK_NETIF_MIT_AUTO;
191 char sk_ll_prefix[IFNAMSIZ] = "llw";
192 uint32_t sk_channel_buflet_alloc = 0;
193 uint32_t sk_netif_queue_stat_enable = 0;
194 
195 SYSCTL_NODE(_kern, OID_AUTO, skywalk, CTLFLAG_RW | CTLFLAG_LOCKED,
196     0, "Skywalk parameters");
197 SYSCTL_NODE(_kern_skywalk, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_LOCKED,
198     0, "Skywalk statistics");
199 
200 SYSCTL_OPAQUE(_kern_skywalk, OID_AUTO, features, CTLFLAG_RD | CTLFLAG_LOCKED,
201     &sk_features, sizeof(sk_features), "-", "Skywalk features");
202 
203 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
204     &sk_verbose, "Skywalk verbose mode");
205 
206 #if (DEVELOPMENT || DEBUG)
207 SYSCTL_LONG(_kern_skywalk, OID_AUTO, sk_copy_thres, CTLFLAG_RW | CTLFLAG_LOCKED,
208     &sk_copy_thres, "Skywalk copy threshold");
209 static int __priv_check = 1;
210 SYSCTL_INT(_kern_skywalk, OID_AUTO, priv_check, CTLFLAG_RW | CTLFLAG_LOCKED,
211     &__priv_check, 0, "Skywalk privilege check");
212 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_opp_defunct, CTLFLAG_RW | CTLFLAG_LOCKED,
213     &sk_opp_defunct, 0, "Defunct opportunistically");
214 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_tx, CTLFLAG_RW | CTLFLAG_LOCKED,
215     &sk_cksum_tx, 0, "Advertise (and perform) outbound checksum offload");
216 SYSCTL_UINT(_kern_skywalk, OID_AUTO, sk_cksum_rx, CTLFLAG_RW | CTLFLAG_LOCKED,
217     &sk_cksum_rx, 0, "Perform inbound checksum offload");
218 
219 uint32_t sk_inject_error_rmask = 0x3;
220 SYSCTL_UINT(_kern_skywalk, OID_AUTO, inject_error_rmask,
221     CTLFLAG_RW | CTLFLAG_LOCKED, &sk_inject_error_rmask, 0x3, "");
222 
223 static void skywalk_self_tests(void);
224 #endif /* (DEVELOPMENT || DEBUG) */
225 
226 #define SKMEM_TAG_SYSCTL_BUF "com.apple.skywalk.sysctl_buf"
227 SKMEM_TAG_DEFINE(skmem_tag_sysctl_buf, SKMEM_TAG_SYSCTL_BUF);
228 
229 #define SKMEM_TAG_OID       "com.apple.skywalk.skoid"
230 SKMEM_TAG_DEFINE(skmem_tag_oid, SKMEM_TAG_OID);
231 
232 #if (SK_LOG || DEVELOPMENT || DEBUG)
233 #define SKMEM_TAG_DUMP  "com.apple.skywalk.dump"
234 static SKMEM_TAG_DEFINE(skmem_tag_dump, SKMEM_TAG_DUMP);
235 
236 static uint32_t sk_dump_buf_size;
237 static char *__sized_by(sk_dump_buf_size) sk_dump_buf;
238 #define SK_DUMP_BUF_ALIGN       16
239 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
240 
241 os_log_t sk_log_handle;
242 
243 __startup_func
244 void
__sk_tag_make(const struct sk_tag_spec * spec)245 __sk_tag_make(const struct sk_tag_spec *spec)
246 {
247 	*spec->skt_var = kern_allocation_name_allocate(spec->skt_name, 0);
248 }
249 
250 boolean_t
skywalk_netif_direct_enabled(void)251 skywalk_netif_direct_enabled(void)
252 {
253 	return sk_netif_direct_cnt > 0;
254 }
255 
256 boolean_t
skywalk_netif_direct_allowed(const char * ifname)257 skywalk_netif_direct_allowed(const char *ifname)
258 {
259 	uint32_t i;
260 
261 	for (i = 0; i < sk_netif_direct_cnt; i++) {
262 		if (strlcmp(sk_netif_direct[i], ifname, IFXNAMSIZ) == 0) {
263 			return TRUE;
264 		}
265 	}
266 	return FALSE;
267 }
268 
269 #if (DEVELOPMENT || DEBUG)
270 static void
parse_netif_direct(void)271 parse_netif_direct(void)
272 {
273 	char buf[(IFXNAMSIZ + 1) * SK_NETIF_DIRECT_MAX];
274 	size_t i, curr, len, iflen;
275 
276 	if (!PE_parse_boot_arg_str("sk_netif_direct", buf, sizeof(buf))) {
277 		return;
278 	}
279 
280 	curr = 0;
281 	len = strbuflen(buf);
282 	for (i = 0; i < len + 1 &&
283 	    sk_netif_direct_cnt < SK_NETIF_DIRECT_MAX; i++) {
284 		if (buf[i] != ',' && buf[i] != '\0') {
285 			continue;
286 		}
287 
288 		buf[i] = '\0';
289 		iflen = i - curr;
290 		if (iflen > 0 && iflen < IFXNAMSIZ) {
291 			(void) strbufcpy(sk_netif_direct[sk_netif_direct_cnt],
292 			    IFXNAMSIZ, buf + curr, IFXNAMSIZ);
293 			sk_netif_direct_cnt++;
294 		}
295 		curr = i + 1;
296 	}
297 }
298 #endif /* DEVELOPMENT || DEBUG */
299 
300 static void
skywalk_fini(void)301 skywalk_fini(void)
302 {
303 	SK_LOCK_ASSERT_HELD();
304 
305 	if (__sk_inited) {
306 #if (DEVELOPMENT || DEBUG)
307 		skmem_test_fini();
308 		cht_test_fini();
309 #endif /* (DEVELOPMENT || DEBUG) */
310 		channel_fini();
311 		nexus_fini();
312 		skmem_fini();
313 		flowidns_fini();
314 
315 #if (SK_LOG || DEVELOPMENT || DEBUG)
316 		if (sk_dump_buf != NULL) {
317 			sk_free_data_sized_by(sk_dump_buf, sk_dump_buf_size);
318 			sk_dump_buf = NULL;
319 			sk_dump_buf_size = 0;
320 		}
321 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
322 
323 		__sk_inited = 0;
324 	}
325 }
326 
327 int
skywalk_init(void)328 skywalk_init(void)
329 {
330 	int error;
331 
332 	VERIFY(!__sk_inited);
333 
334 	static_assert(sizeof(kern_packet_t) == sizeof(uint64_t));
335 	static_assert(sizeof(bitmap_t) == sizeof(uint64_t));
336 
337 	sk_log_handle = os_log_create("com.apple.xnu", "skywalk");
338 
339 #if (DEVELOPMENT || DEBUG)
340 	PE_parse_boot_argn("sk_verbose", &sk_verbose, sizeof(sk_verbose));
341 	(void) PE_parse_boot_argn("sk_opp_defunct", &sk_opp_defunct,
342 	    sizeof(sk_opp_defunct));
343 	(void) PE_parse_boot_argn("sk_cksum_tx", &sk_cksum_tx,
344 	    sizeof(sk_cksum_tx));
345 	(void) PE_parse_boot_argn("sk_cksum_rx", &sk_cksum_rx,
346 	    sizeof(sk_cksum_rx));
347 	(void) PE_parse_boot_argn("sk_txring_sz", &sk_txring_sz,
348 	    sizeof(sk_txring_sz));
349 	(void) PE_parse_boot_argn("sk_rxring_sz", &sk_rxring_sz,
350 	    sizeof(sk_rxring_sz));
351 	(void) PE_parse_boot_argn("sk_net_txring_sz", &sk_net_txring_sz,
352 	    sizeof(sk_net_txring_sz));
353 	(void) PE_parse_boot_argn("sk_net_rxring_sz", &sk_net_rxring_sz,
354 	    sizeof(sk_net_rxring_sz));
355 	(void) PE_parse_boot_argn("sk_max_flows", &sk_max_flows,
356 	    sizeof(sk_max_flows));
357 	(void) PE_parse_boot_argn("sk_native_txmodel", &sk_netif_native_txmodel,
358 	    sizeof(sk_netif_native_txmodel));
359 	(void) PE_parse_boot_argn("sk_compat_txmodel", &sk_netif_compat_txmodel,
360 	    sizeof(sk_netif_compat_txmodel));
361 	(void) PE_parse_boot_argn("sk_tx_delay_qlen", &sk_tx_delay_qlen,
362 	    sizeof(sk_tx_delay_qlen));
363 	(void) PE_parse_boot_argn("sk_ts_delay_timeout", &sk_tx_delay_timeout,
364 	    sizeof(sk_tx_delay_timeout));
365 	(void) PE_parse_boot_argn("sk_compat_aux_cell_tx_ring_sz",
366 	    &sk_netif_compat_aux_cell_tx_ring_sz,
367 	    sizeof(sk_netif_compat_aux_cell_tx_ring_sz));
368 	(void) PE_parse_boot_argn("sk_compat_aux_cell_rx_ring_sz",
369 	    &sk_netif_compat_aux_cell_rx_ring_sz,
370 	    sizeof(sk_netif_compat_aux_cell_rx_ring_sz));
371 	(void) PE_parse_boot_argn("sk_compat_wap_tx_ring_sz",
372 	    &sk_netif_compat_wap_tx_ring_sz,
373 	    sizeof(sk_netif_compat_wap_tx_ring_sz));
374 	(void) PE_parse_boot_argn("sk_compat_wap_rx_ring_sz",
375 	    &sk_netif_compat_wap_rx_ring_sz,
376 	    sizeof(sk_netif_compat_wap_rx_ring_sz));
377 	(void) PE_parse_boot_argn("sk_compat_awdl_tx_ring_sz",
378 	    &sk_netif_compat_awdl_tx_ring_sz,
379 	    sizeof(sk_netif_compat_awdl_tx_ring_sz));
380 	(void) PE_parse_boot_argn("sk_compat_awdl_rx_ring_sz",
381 	    &sk_netif_compat_awdl_rx_ring_sz,
382 	    sizeof(sk_netif_compat_awdl_rx_ring_sz));
383 	(void) PE_parse_boot_argn("sk_compat_wif_tx_ring_sz",
384 	    &sk_netif_compat_wif_tx_ring_sz,
385 	    sizeof(sk_netif_compat_wif_tx_ring_sz));
386 	(void) PE_parse_boot_argn("sk_compat_wif_rx_ring_sz",
387 	    &sk_netif_compat_wif_rx_ring_sz,
388 	    sizeof(sk_netif_compat_wif_rx_ring_sz));
389 	(void) PE_parse_boot_argn("sk_compat_usb_eth_tx_ring_sz",
390 	    &sk_netif_compat_usb_eth_tx_ring_sz,
391 	    sizeof(sk_netif_compat_usb_eth_tx_ring_sz));
392 	(void) PE_parse_boot_argn("sk_compat_usb_eth_rx_ring_sz",
393 	    &sk_netif_compat_usb_eth_rx_ring_sz,
394 	    sizeof(sk_netif_compat_usb_eth_rx_ring_sz));
395 	(void) PE_parse_boot_argn("sk_compat_rx_mbq_limit",
396 	    &sk_netif_compat_rx_mbq_limit, sizeof(sk_netif_compat_rx_mbq_limit));
397 	(void) PE_parse_boot_argn("sk_netif_tx_mit",
398 	    &sk_netif_tx_mit, sizeof(sk_netif_tx_mit));
399 	(void) PE_parse_boot_argn("sk_netif_rx_mit",
400 	    &sk_netif_rx_mit, sizeof(sk_netif_rx_mit));
401 	(void) PE_parse_boot_arg_str("sk_ll_prefix", sk_ll_prefix,
402 	    sizeof(sk_ll_prefix));
403 	(void) PE_parse_boot_argn("sk_netif_q_stats", &sk_netif_queue_stat_enable,
404 	    sizeof(sk_netif_queue_stat_enable));
405 	parse_netif_direct();
406 	(void) PE_parse_boot_argn("sk_fsw_rx_agg_tcp", &sk_fsw_rx_agg_tcp,
407 	    sizeof(sk_fsw_rx_agg_tcp));
408 	(void) PE_parse_boot_argn("sk_fsw_tx_agg_tcp", &sk_fsw_tx_agg_tcp,
409 	    sizeof(sk_fsw_tx_agg_tcp));
410 	(void) PE_parse_boot_argn("sk_fsw_gso_mtu", &sk_fsw_gso_mtu,
411 	    sizeof(sk_fsw_gso_mtu));
412 	(void) PE_parse_boot_argn("sk_fsw_max_bufs", &sk_fsw_max_bufs,
413 	    sizeof(sk_fsw_max_bufs));
414 	(void) PE_parse_boot_argn("sk_guard", &sk_guard, sizeof(sk_guard));
415 	(void) PE_parse_boot_argn("sk_headguard_sz", &sk_headguard_sz,
416 	    sizeof(sk_headguard_sz));
417 	(void) PE_parse_boot_argn("sk_tailguard_sz", &sk_tailguard_sz,
418 	    sizeof(sk_tailguard_sz));
419 	(void) PE_parse_boot_argn("sk_min_pool_size", &sk_min_pool_size,
420 	    sizeof(sk_min_pool_size));
421 #endif /* DEVELOPMENT || DEBUG */
422 
423 	if (sk_max_flows == 0) {
424 		sk_max_flows = NX_FLOWADV_DEFAULT;
425 	} else if (sk_max_flows > NX_FLOWADV_MAX) {
426 		sk_max_flows = NX_FLOWADV_MAX;
427 	}
428 
429 	if (sk_netif_tx_mit > SK_NETIF_MIT_MAX) {
430 		sk_netif_tx_mit = SK_NETIF_MIT_MAX;
431 	}
432 	if (sk_netif_rx_mit > SK_NETIF_MIT_MAX) {
433 		sk_netif_rx_mit = SK_NETIF_MIT_MAX;
434 	}
435 
436 	sk_fadv_nchunks = (uint32_t)P2ROUNDUP(sk_max_flows, FO_FLOWADV_CHUNK) /
437 	    FO_FLOWADV_CHUNK;
438 
439 	if (sk_guard) {
440 		uint32_t sz;
441 		/* leading guard page(s) */
442 		if (sk_headguard_sz == 0) {
443 			read_frandom(&sz, sizeof(sz));
444 			sk_headguard_sz = (sz % (SK_GUARD_MAX + 1));
445 		} else if (sk_headguard_sz > SK_GUARD_MAX) {
446 			sk_headguard_sz = SK_GUARD_MAX;
447 		}
448 		if (sk_headguard_sz < SK_GUARD_MIN) {
449 			sk_headguard_sz = SK_GUARD_MIN;
450 		}
451 		/* trailing guard page(s) */
452 		if (sk_tailguard_sz == 0) {
453 			read_frandom(&sz, sizeof(sz));
454 			sk_tailguard_sz = (sz % (SK_GUARD_MAX + 1));
455 		} else if (sk_tailguard_sz > SK_GUARD_MAX) {
456 			sk_tailguard_sz = SK_GUARD_MAX;
457 		}
458 		if (sk_tailguard_sz < SK_GUARD_MIN) {
459 			sk_tailguard_sz = SK_GUARD_MIN;
460 		}
461 	} else {
462 		sk_headguard_sz = sk_tailguard_sz = SK_GUARD_MIN;
463 	}
464 	ASSERT(sk_headguard_sz >= SK_GUARD_MIN);
465 	ASSERT(sk_headguard_sz <= SK_GUARD_MAX);
466 	ASSERT(sk_tailguard_sz >= SK_GUARD_MIN);
467 	ASSERT(sk_tailguard_sz <= SK_GUARD_MAX);
468 
469 	__sk_inited = 1;
470 
471 	SK_LOCK();
472 	skmem_init();
473 	error = nexus_init();
474 	if (error == 0) {
475 		error = channel_init();
476 	}
477 	if (error != 0) {
478 		skywalk_fini();
479 	}
480 	SK_UNLOCK();
481 
482 	if (error == 0) {
483 #if (SK_LOG || DEVELOPMENT || DEBUG)
484 		/* allocate space for sk_dump_buf */
485 		sk_dump_buf = sk_alloc_data(SK_DUMP_BUF_SIZE, Z_WAITOK | Z_NOFAIL,
486 		    skmem_tag_dump);
487 		sk_dump_buf_size = SK_DUMP_BUF_SIZE;
488 #endif /* (SK_LOG || DEVELOPMENT || DEBUG) */
489 
490 		netns_init();
491 		protons_init();
492 		flowidns_init();
493 
494 #if (DEVELOPMENT || DEBUG)
495 		skywalk_self_tests();
496 		skmem_test_init();
497 		cht_test_init();
498 #endif /* DEVELOPMENT || DEBUG */
499 	}
500 
501 	return error;
502 }
503 
504 /*
505  * csproc_get_platform_binary() returns non-zero if the process is platform
506  * code, which means that it is considered part of the Operating System.
507  * On iOS, that means it's contained in the trust cache or a loaded one.
508  * On macOS, everything signed by B&I is currently platform code, but the
509  * policy in general is subject to change.  Thus this is an approximate.
510  */
511 boolean_t
skywalk_check_platform_binary(proc_t p)512 skywalk_check_platform_binary(proc_t p)
513 {
514 	return (csproc_get_platform_binary(p) == 0) ? FALSE : TRUE;
515 }
516 
517 static int
sk_priv_chk(proc_t p,kauth_cred_t cred,int priv)518 sk_priv_chk(proc_t p, kauth_cred_t cred, int priv)
519 {
520 #pragma unused(p)
521 	int ret = EPERM;
522 
523 	if (cred != NULL) {
524 		ret = priv_check_cred(cred, priv, 0);
525 	}
526 #if (DEVELOPMENT || DEBUG)
527 	if (ret != 0) {
528 		const char *pstr;
529 
530 		switch (priv) {
531 		case PRIV_SKYWALK_REGISTER_USER_PIPE:
532 			pstr = "com.apple.private.skywalk.register-user-pipe";
533 			break;
534 
535 		case PRIV_SKYWALK_REGISTER_KERNEL_PIPE:
536 			pstr = "com.apple.private.skywalk.register-kernel-pipe";
537 			break;
538 
539 		case PRIV_SKYWALK_REGISTER_NET_IF:
540 			pstr = "com.apple.private.skywalk.register-net-if";
541 			break;
542 
543 		case PRIV_SKYWALK_REGISTER_FLOW_SWITCH:
544 			pstr = "com.apple.private.skywalk.register-flow-switch";
545 			break;
546 
547 		case PRIV_SKYWALK_OBSERVE_ALL:
548 			pstr = "com.apple.private.skywalk.observe-all";
549 			break;
550 
551 		case PRIV_SKYWALK_OBSERVE_STATS:
552 			pstr = "com.apple.private.skywalk.observe-stats";
553 			break;
554 
555 		case PRIV_SKYWALK_LOW_LATENCY_CHANNEL:
556 			pstr = "com.apple.private.skywalk.low-latency-channel";
557 			break;
558 
559 		default:
560 			pstr = "unknown";
561 			break;
562 		}
563 
564 #if SK_LOG
565 		if (__priv_check) {
566 			SK_DF(SK_VERB_PRIV, "%s(%d) insufficient privilege %d "
567 			    "(\"%s\") err %d", sk_proc_name(p),
568 			    sk_proc_pid(p), priv, pstr, ret);
569 		} else {
570 			SK_DF(SK_VERB_PRIV, "%s(%d) IGNORING missing privilege "
571 			    "%d (\"%s\") err %d", sk_proc_name(p),
572 			    sk_proc_pid(p), priv, pstr, ret);
573 		}
574 #endif /* SK_LOG */
575 
576 		/* ignore privilege check failures if requested */
577 		if (!__priv_check) {
578 			ret = 0;
579 		}
580 	}
581 #endif /* !DEVELOPMENT && !DEBUG */
582 
583 	return ret;
584 }
585 
586 int
skywalk_priv_check_cred(proc_t p,kauth_cred_t cred,int priv)587 skywalk_priv_check_cred(proc_t p, kauth_cred_t cred, int priv)
588 {
589 	return sk_priv_chk(p, cred, priv);
590 }
591 
592 #if CONFIG_MACF
593 int
skywalk_mac_system_check_proc_cred(proc_t p,const char * info_type)594 skywalk_mac_system_check_proc_cred(proc_t p, const char *info_type)
595 {
596 	int ret;
597 	kauth_cred_t cred = kauth_cred_proc_ref(p);
598 	ret = mac_system_check_info(cred, info_type);
599 	kauth_cred_unref(&cred);
600 
601 	return ret;
602 }
603 #endif /* CONFIG_MACF */
604 
605 /*
606  * Scan thru the list of privileges needed before we allow someone
607  * to open a handle to the Nexus controller.  This should be done
608  * at nxctl_create() time, and additional privilege check specific
609  * to the operation (e.g. register, etc.) should be done afterwards.
610  */
611 int
skywalk_nxctl_check_privileges(proc_t p,kauth_cred_t cred)612 skywalk_nxctl_check_privileges(proc_t p, kauth_cred_t cred)
613 {
614 	int ret = 0;
615 
616 	if (p == kernproc) {
617 		goto done;
618 	}
619 
620 	do {
621 		/*
622 		 * Check for observe-{stats,all} entitlements first
623 		 * before the rest, to account for nexus controller
624 		 * clients that don't need anything but statistics;
625 		 * it would help quiesce sandbox violation warnings.
626 		 */
627 		if ((ret = sk_priv_chk(p, cred,
628 		    PRIV_SKYWALK_OBSERVE_STATS)) == 0) {
629 			break;
630 		}
631 		if ((ret = sk_priv_chk(p, cred,
632 		    PRIV_SKYWALK_OBSERVE_ALL)) == 0) {
633 			break;
634 		}
635 		if ((ret = sk_priv_chk(p, cred,
636 		    PRIV_SKYWALK_REGISTER_USER_PIPE)) == 0) {
637 			break;
638 		}
639 		if ((ret = sk_priv_chk(p, cred,
640 		    PRIV_SKYWALK_REGISTER_KERNEL_PIPE)) == 0) {
641 			break;
642 		}
643 		if ((ret = sk_priv_chk(p, cred,
644 		    PRIV_SKYWALK_REGISTER_NET_IF)) == 0) {
645 			break;
646 		}
647 		if ((ret = sk_priv_chk(p, cred,
648 		    PRIV_SKYWALK_REGISTER_FLOW_SWITCH)) == 0) {
649 			break;
650 		}
651 		/* none set, so too bad */
652 		ret = EPERM;
653 	} while (0);
654 
655 #if (DEVELOPMENT || DEBUG)
656 	if (ret != 0) {
657 		SK_ERR("%s(%d) insufficient privilege to open nexus controller "
658 		    "err %d", sk_proc_name(p), sk_proc_pid(p), ret);
659 	}
660 #endif /* !DEVELOPMENT && !DEBUG */
661 done:
662 	return ret;
663 }
664 
665 void
sk_gen_guard_id(boolean_t isch,const uuid_t uuid,guardid_t * guard)666 sk_gen_guard_id(boolean_t isch, const uuid_t uuid, guardid_t *guard)
667 {
668 #define GUARD_CH_SIG    0x4348  /* 'CH' */
669 #define GUARD_NX_SIG    0x4e58  /* 'NX' */
670 	union {
671 		uint8_t         _u8[8];
672 		uint16_t        _u16[4];
673 		uint64_t        _u64;
674 	} __u;
675 
676 	read_random(&__u._u16[0], sizeof(uint16_t));
677 	bcopy(uuid, (void *)&__u._u16[1], sizeof(uint16_t));
678 	__u._u16[2] = htole16(isch ? GUARD_CH_SIG : GUARD_NX_SIG);
679 	__u._u16[3] = htole16(0x534b);  /* 'SK' */
680 	VERIFY(__u._u64 != 0);
681 
682 	bzero(guard, sizeof(*guard));
683 	bcopy((void *)&__u._u64, guard, MIN(sizeof(*guard),
684 	    sizeof(uint64_t)));
685 }
686 
687 
688 extern char *
__counted_by(sizeof (uuid_string_t))689 __counted_by(sizeof(uuid_string_t))
690 sk_uuid_unparse(const uuid_t uu, uuid_string_t out)
691 {
692 	uuid_unparse_upper(uu, out);
693 	return out;
694 }
695 
696 #if SK_LOG
697 /*
698  * packet-dump function, user-supplied or static buffer.
699  * The destination buffer must be at least 30+4*len
700  *
701  * @param p
702  *   buffer to be dumped.
703  * @param len
704  *   buffer's total length.
705  * @param dumplen
706  *   length to be dumped.
707  */
708 const char *
__counted_by(SK_DUMP_BUF_SIZE)709 __counted_by(SK_DUMP_BUF_SIZE)
710 sk_dump(const char *label, const void *__sized_by(len) obj, int len, int dumplen)
711 {
712 	int i, j, i0, n = 0;
713 	static char hex[] = "0123456789abcdef";
714 	const char *p = obj;    /* dump cursor */
715 	uint32_t size;
716 	char *__sized_by(size) o;        /* output position */
717 	const int lim = SK_DUMP_BUF_SIZE;
718 	char* __counted_by(lim) dst = sk_dump_buf;
719 
720 
721 #define P_HI(x) hex[((x) & 0xf0) >> 4]
722 #define P_LO(x) hex[((x) & 0xf)]
723 #define P_C(x)  ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
724 
725 	dumplen = MIN(len, dumplen);
726 	o = dst;
727 	size = lim;
728 	n = scnprintf(o, lim, "%s %p len %d lim %d\n", label,
729 	    SK_KVA(p), len, lim);
730 	o += strbuflen(o, n);
731 	size -= n;
732 	/* hexdump routine */
733 	for (i = 0; i < dumplen;) {
734 		n = scnprintf(o, size, "%5d: ", i);
735 		o += n;
736 		size -= n;
737 		memset(o, ' ', 48);
738 		i0 = i;
739 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
740 			o[j * 3] = P_HI(p[i]);
741 			o[j * 3 + 1] = P_LO(p[i]);
742 		}
743 		i = i0;
744 		for (j = 0; j < 16 && i < dumplen; i++, j++) {
745 			o[j + 48] = P_C(p[i]);
746 		}
747 		o[j + 48] = '\n';
748 		o += j + 49;
749 		size -= (j + 49);
750 	}
751 	*o = '\0';
752 #undef P_HI
753 #undef P_LO
754 #undef P_C
755 	return dst;
756 }
757 
758 /*
759  * "Safe" variant of proc_name_address(), meant to be used only for logging.
760  */
761 const char *
sk_proc_name(struct proc * p)762 sk_proc_name(struct proc *p)
763 {
764 	if (p == PROC_NULL) {
765 		return "proc_null";
766 	}
767 
768 	return proc_name_address(p);
769 }
770 
771 /*
772  * "Safe" variant of proc_pid(), mean to be used only for logging.
773  */
774 int
sk_proc_pid(struct proc * p)775 sk_proc_pid(struct proc *p)
776 {
777 	if (p == PROC_NULL) {
778 		return -1;
779 	}
780 
781 	return proc_pid(p);
782 }
783 
784 const char *
sk_ntop(int af,const void * addr,char * __counted_by (addr_strlen)addr_str,size_t addr_strlen)785 sk_ntop(int af, const void *addr, char *__counted_by(addr_strlen)addr_str,
786     size_t addr_strlen)
787 {
788 	const char *__null_terminated str = NULL;
789 
790 	addr_str[0] = '\0';
791 
792 	if (inp_log_privacy != 0) {
793 		switch (af) {
794 		case AF_INET:
795 			strlcpy(addr_str, "<IPv4-redacted>", addr_strlen);
796 			break;
797 		case AF_INET6:
798 			strlcpy(addr_str, "<IPv6-redacted>", addr_strlen);
799 			break;
800 		default:
801 			VERIFY(0);
802 			__builtin_unreachable();
803 		}
804 		str = __unsafe_null_terminated_from_indexable(addr_str);
805 	} else {
806 		str = inet_ntop(af, addr, addr_str, (socklen_t)addr_strlen);
807 	}
808 
809 	return str;
810 }
811 
812 const char *
sk_sa_ntop(struct sockaddr * sa,char * __counted_by (addr_strlen)addr_str,size_t addr_strlen)813 sk_sa_ntop(struct sockaddr *sa, char *__counted_by(addr_strlen)addr_str,
814     size_t addr_strlen)
815 {
816 	const char *__null_terminated str = NULL;
817 
818 	addr_str[0] = '\0';
819 
820 	switch (sa->sa_family) {
821 	case AF_INET:
822 		str = sk_ntop(AF_INET, &SIN(sa)->sin_addr.s_addr,
823 		    addr_str, (socklen_t)addr_strlen);
824 		break;
825 
826 	case AF_INET6:
827 		str = sk_ntop(AF_INET6, &SIN6(sa)->sin6_addr,
828 		    addr_str, (socklen_t)addr_strlen);
829 		break;
830 
831 	default:
832 		str = __unsafe_null_terminated_from_indexable(addr_str);
833 		break;
834 	}
835 
836 	return str;
837 }
838 #endif /* SK_LOG */
839 
840 bool
sk_sa_has_addr(struct sockaddr * sa)841 sk_sa_has_addr(struct sockaddr *sa)
842 {
843 	switch (sa->sa_family) {
844 	case AF_INET:
845 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
846 		return SIN(sa)->sin_addr.s_addr != INADDR_ANY;
847 	case AF_INET6:
848 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
849 		return !IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr);
850 	default:
851 		return false;
852 	}
853 }
854 
855 bool
sk_sa_has_port(struct sockaddr * sa)856 sk_sa_has_port(struct sockaddr *sa)
857 {
858 	switch (sa->sa_family) {
859 	case AF_INET:
860 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
861 		return SIN(sa)->sin_port != 0;
862 	case AF_INET6:
863 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
864 		return SIN6(sa)->sin6_port != 0;
865 	default:
866 		return false;
867 	}
868 }
869 
870 /* returns port number in host byte order */
871 uint16_t
sk_sa_get_port(struct sockaddr * sa)872 sk_sa_get_port(struct sockaddr *sa)
873 {
874 	switch (sa->sa_family) {
875 	case AF_INET:
876 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in));
877 		return ntohs(SIN(sa)->sin_port);
878 	case AF_INET6:
879 		ASSERT(sa->sa_len == sizeof(struct sockaddr_in6));
880 		return ntohs(SIN6(sa)->sin6_port);
881 	default:
882 		VERIFY(0);
883 		/* NOTREACHED */
884 		__builtin_unreachable();
885 	}
886 }
887 
888 void
skywalk_kill_process(struct proc * p,uint64_t reason_code)889 skywalk_kill_process(struct proc *p, uint64_t reason_code)
890 {
891 	os_reason_t exit_reason = OS_REASON_NULL;
892 
893 	VERIFY(p != kernproc);
894 
895 	exit_reason = os_reason_create(OS_REASON_SKYWALK, reason_code);
896 	if (exit_reason == OS_REASON_NULL) {
897 		SK_ERR("%s(%d) unable to allocate memory for crash reason "
898 		    "0x%llX", sk_proc_name(p), sk_proc_pid(p),
899 		    reason_code);
900 	} else {
901 		exit_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
902 		SK_ERR("%s(%d) aborted for reason 0x%llX",
903 		    sk_proc_name(p), sk_proc_pid(p), reason_code);
904 	}
905 
906 	psignal_try_thread_with_reason(p, current_thread(), SIGABRT,
907 	    exit_reason);
908 }
909 
910 #if (DEVELOPMENT || DEBUG)
911 #define SK_MEMCMP_LEN 256               /* length of each section */
912 #define SK_MASK_MAXLEN 80               /* maximum mask length */
913 
914 #define SK_MEMCMP_MASK_VERIFY(t, l, lr) do {                            \
915 	static_assert(sizeof(t##_m) == SK_MASK_MAXLEN);                      \
916 	if ((sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) ^           \
917 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
918 	        panic_plain("\nbroken: " #t " using "                   \
919 	            "sk_memcmp_mask_" #l " at i=%d\n", i);              \
920 	/* NOTREACHED */                                        \
921 	        __builtin_unreachable();                                \
922 	}                                                               \
923 	if ((sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) ^  \
924 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, lr) != 0)) {    \
925 	        panic_plain("\nbroken: " #t " using "                   \
926 	            "sk_memcmp_mask_" #l "_scalar at i=%d\n", i);       \
927 	/* NOTREACHED */                                        \
928 	        __builtin_unreachable();                                \
929 	}                                                               \
930 } while (0)
931 
932 #define SK_MEMCMP_MASK_MATCH_VERIFY(t, l) do {                          \
933 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) != 0) {            \
934 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
935 	            " mismatch (expected match) at i=%d s1=0x%x"        \
936 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
937 	/* NOTREACHED */                                        \
938 	        __builtin_unreachable();                                \
939 	}                                                               \
940 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) != 0) {   \
941 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
942 	            "_scalar mismatch (expected match) at i=%d s1=0x%x" \
943 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
944 	/* NOTREACHED */                                        \
945 	        __builtin_unreachable();                                \
946 	}                                                               \
947 } while (0)
948 
949 #define SK_MEMCMP_MASK_MISMATCH_VERIFY(t, l) do {                       \
950 	if (sk_memcmp_mask_##l##B(hdr1, hdr2, t##_m) == 0) {            \
951 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
952 	            " match (expected mismatch) at i=%d s1=0x%x"        \
953 	            " s2=0x%x\n", i, hdr1[i], hdr2[i]);                 \
954 	/* NOTREACHED */                                        \
955 	        __builtin_unreachable();                                \
956 	}                                                               \
957 	if (sk_memcmp_mask_##l##B##_scalar(hdr1, hdr2, t##_m) == 0) {   \
958 	        panic_plain("\nbroken: " #t " using sk_memcmp_mask_" #l \
959 	            "_scalar match (expected mismatch) at i=%d "        \
960 	            "s1=0x%x s2=0x%x\n", i, hdr1[i], hdr2[i]);          \
961 	/* NOTREACHED */                                        \
962 	        __builtin_unreachable();                                \
963 	}                                                               \
964 } while (0)
965 
966 #define SK_MEMCMP_BYTEMASK_VERIFY(t) do {                               \
967 	if ((sk_memcmp_mask(hdr1, hdr2, t##_m, i) != 0) ^               \
968 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
969 	        panic_plain("\nbroken: " #t " using "                   \
970 	            "sk_memcmp_mask at i=%d\n", i);                     \
971 	/* NOTREACHED */                                        \
972 	        __builtin_unreachable();                                \
973 	}                                                               \
974 	if ((sk_memcmp_mask_scalar(hdr1, hdr2, t##_m, i) != 0) ^        \
975 	    (skywalk_memcmp_mask_ref(hdr1, hdr2, t##_m, i) != 0)) {     \
976 	        panic_plain("\nbroken: " #t " using "                   \
977 	            "sk_memcmp_mask_scalar at i=%d\n", i);              \
978 	/* NOTREACHED */                                        \
979 	        __builtin_unreachable();                                \
980 	}                                                               \
981 } while (0)
982 
983 static inline int
skywalk_memcmp_mask_ref(const uint8_t * __sized_by (n)src1,const uint8_t * __sized_by (n)src2,const uint8_t * __sized_by (n)byte_mask,size_t n)984 skywalk_memcmp_mask_ref(const uint8_t *__sized_by(n)src1,
985     const uint8_t *__sized_by(n)src2, const uint8_t *__sized_by(n)byte_mask,
986     size_t n)
987 {
988 	uint32_t result = 0;
989 	for (size_t i = 0; i < n; i++) {
990 		result |= (src1[i] ^ src2[i]) & byte_mask[i];
991 	}
992 	return result;
993 }
994 
995 static void
skywalk_memcmp_mask_self_tests(void)996 skywalk_memcmp_mask_self_tests(void)
997 {
998 	static const uint8_t ipv4_m[] = {
999 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xff, 0xff,
1000 		0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
1001 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1002 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1003 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1004 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1005 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1006 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1007 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1008 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1009 	};
1010 	static const uint8_t ipv6_m[] = {
1011 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1012 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1013 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1014 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1015 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1016 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1017 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1018 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1019 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1020 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1021 	};
1022 	static const uint8_t tcp_m[] = {
1023 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1024 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1025 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1026 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1027 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1028 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1029 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1030 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1031 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1032 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1033 	};
1034 	static const uint8_t ipv6_tcp_m[] = {
1035 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1036 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1037 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1038 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1039 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1040 		0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1041 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1042 		0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1043 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1044 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1045 	};
1046 	static const uint8_t udp_m[] = {
1047 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1048 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1049 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1050 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1051 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1052 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1053 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1054 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1055 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1056 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1057 	};
1058 	static const uint8_t fk_all_m[] = {
1059 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1060 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1061 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1062 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1063 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1064 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1065 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1066 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1067 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1068 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1069 	};
1070 	static const uint8_t fk_t2_m[] = {
1071 		0x0a, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
1072 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1073 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1074 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1075 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1076 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1077 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1078 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1079 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1080 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1081 	};
1082 	static const uint8_t fk_t3_m[] = {
1083 		0x0f, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
1084 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1085 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1086 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1087 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1088 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1089 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1090 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1091 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1092 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1093 	};
1094 	static const uint8_t fk_t4_m[] = {
1095 		0x2f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1096 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1097 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1098 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1099 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1100 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1101 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1102 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1103 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1104 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1105 	};
1106 	static const uint8_t fk_t5_m[] = {
1107 		0x3f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1108 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1109 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1110 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1111 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1112 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1113 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1114 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1115 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1116 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1117 	};
1118 	static const uint8_t fk_i1_m[] = {
1119 		0x02, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
1120 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1121 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1122 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1123 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1124 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1125 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1126 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1127 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1128 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1129 	};
1130 	static const uint8_t fk_i2_m[] = {
1131 		0x07, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1132 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1133 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1134 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1135 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1136 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1137 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1138 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1139 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1140 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1141 	};
1142 	static const uint8_t fk_i3_m[] = {
1143 		0x17, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
1144 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1145 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1146 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1147 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1148 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1149 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1150 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1151 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1152 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1153 	};
1154 
1155 	/* validate flow entry mask (2-tuple) */
1156 	static_assert(FKMASK_2TUPLE == (FKMASK_PROTO | FKMASK_SPORT));
1157 	VERIFY(fk_mask_2tuple.fk_mask == FKMASK_2TUPLE);
1158 	VERIFY(fk_mask_2tuple.fk_ipver == 0);
1159 	VERIFY(fk_mask_2tuple.fk_proto == 0xff);
1160 	VERIFY(fk_mask_2tuple.fk_sport == 0xffff);
1161 	VERIFY(fk_mask_2tuple.fk_dport == 0);
1162 	VERIFY(fk_mask_2tuple.fk_src._addr64[0] == 0);
1163 	VERIFY(fk_mask_2tuple.fk_src._addr64[1] == 0);
1164 	VERIFY(fk_mask_2tuple.fk_dst._addr64[0] == 0);
1165 	VERIFY(fk_mask_2tuple.fk_dst._addr64[1] == 0);
1166 	VERIFY(fk_mask_2tuple.fk_pad[0] == 0);
1167 
1168 	static_assert(FKMASK_3TUPLE == (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC));
1169 	VERIFY(fk_mask_3tuple.fk_mask == FKMASK_3TUPLE);
1170 	VERIFY(fk_mask_3tuple.fk_ipver == 0xff);
1171 	VERIFY(fk_mask_3tuple.fk_proto == 0xff);
1172 	VERIFY(fk_mask_3tuple.fk_sport == 0xffff);
1173 	VERIFY(fk_mask_3tuple.fk_dport == 0);
1174 	VERIFY(fk_mask_3tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1175 	VERIFY(fk_mask_3tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1176 	VERIFY(fk_mask_3tuple.fk_dst._addr64[0] == 0);
1177 	VERIFY(fk_mask_3tuple.fk_dst._addr64[1] == 0);
1178 	VERIFY(fk_mask_3tuple.fk_pad[0] == 0);
1179 
1180 	static_assert(FKMASK_4TUPLE == (FKMASK_3TUPLE | FKMASK_DPORT));
1181 	VERIFY(fk_mask_4tuple.fk_mask == FKMASK_4TUPLE);
1182 	VERIFY(fk_mask_4tuple.fk_ipver == 0xff);
1183 	VERIFY(fk_mask_4tuple.fk_proto == 0xff);
1184 	VERIFY(fk_mask_4tuple.fk_sport == 0xffff);
1185 	VERIFY(fk_mask_4tuple.fk_dport == 0xffff);
1186 	VERIFY(fk_mask_4tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1187 	VERIFY(fk_mask_4tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1188 	VERIFY(fk_mask_4tuple.fk_dst._addr64[0] == 0);
1189 	VERIFY(fk_mask_4tuple.fk_dst._addr64[1] == 0);
1190 	VERIFY(fk_mask_4tuple.fk_pad[0] == 0);
1191 
1192 	static_assert(FKMASK_5TUPLE == (FKMASK_4TUPLE | FKMASK_DST));
1193 	VERIFY(fk_mask_5tuple.fk_mask == FKMASK_5TUPLE);
1194 	VERIFY(fk_mask_5tuple.fk_ipver == 0xff);
1195 	VERIFY(fk_mask_5tuple.fk_proto == 0xff);
1196 	VERIFY(fk_mask_5tuple.fk_sport == 0xffff);
1197 	VERIFY(fk_mask_5tuple.fk_dport == 0xffff);
1198 	VERIFY(fk_mask_5tuple.fk_src._addr64[0] == 0xffffffffffffffffULL);
1199 	VERIFY(fk_mask_5tuple.fk_src._addr64[1] == 0xffffffffffffffffULL);
1200 	VERIFY(fk_mask_5tuple.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1201 	VERIFY(fk_mask_5tuple.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1202 	VERIFY(fk_mask_5tuple.fk_pad[0] == 0);
1203 
1204 	static_assert(FKMASK_IPFLOW1 == FKMASK_PROTO);
1205 	VERIFY(fk_mask_ipflow1.fk_mask == FKMASK_IPFLOW1);
1206 	VERIFY(fk_mask_ipflow1.fk_ipver == 0);
1207 	VERIFY(fk_mask_ipflow1.fk_proto == 0xff);
1208 	VERIFY(fk_mask_ipflow1.fk_sport == 0);
1209 	VERIFY(fk_mask_ipflow1.fk_dport == 0);
1210 	VERIFY(fk_mask_ipflow1.fk_src._addr64[0] == 0);
1211 	VERIFY(fk_mask_ipflow1.fk_src._addr64[1] == 0);
1212 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[0] == 0);
1213 	VERIFY(fk_mask_ipflow1.fk_dst._addr64[1] == 0);
1214 	VERIFY(fk_mask_ipflow1.fk_pad[0] == 0);
1215 
1216 	static_assert(FKMASK_IPFLOW2 == (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC));
1217 	VERIFY(fk_mask_ipflow2.fk_mask == FKMASK_IPFLOW2);
1218 	VERIFY(fk_mask_ipflow2.fk_ipver == 0xff);
1219 	VERIFY(fk_mask_ipflow2.fk_proto == 0xff);
1220 	VERIFY(fk_mask_ipflow2.fk_sport == 0);
1221 	VERIFY(fk_mask_ipflow2.fk_dport == 0);
1222 	VERIFY(fk_mask_ipflow2.fk_src._addr64[0] == 0xffffffffffffffffULL);
1223 	VERIFY(fk_mask_ipflow2.fk_src._addr64[1] == 0xffffffffffffffffULL);
1224 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[0] == 0);
1225 	VERIFY(fk_mask_ipflow2.fk_dst._addr64[1] == 0);
1226 	VERIFY(fk_mask_ipflow2.fk_pad[0] == 0);
1227 
1228 	static_assert(FKMASK_IPFLOW3 == (FKMASK_IPFLOW2 | FKMASK_DST));
1229 	VERIFY(fk_mask_ipflow3.fk_mask == FKMASK_IPFLOW3);
1230 	VERIFY(fk_mask_ipflow3.fk_ipver == 0xff);
1231 	VERIFY(fk_mask_ipflow3.fk_proto == 0xff);
1232 	VERIFY(fk_mask_ipflow3.fk_sport == 0);
1233 	VERIFY(fk_mask_ipflow3.fk_dport == 0);
1234 	VERIFY(fk_mask_ipflow3.fk_src._addr64[0] == 0xffffffffffffffffULL);
1235 	VERIFY(fk_mask_ipflow3.fk_src._addr64[1] == 0xffffffffffffffffULL);
1236 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[0] == 0xffffffffffffffffULL);
1237 	VERIFY(fk_mask_ipflow3.fk_dst._addr64[1] == 0xffffffffffffffffULL);
1238 	VERIFY(fk_mask_ipflow3.fk_pad[0] == 0);
1239 
1240 	VERIFY(sk_dump_buf != NULL);
1241 
1242 	/* reset sk_dump_buf */
1243 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1244 
1245 	/*
1246 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1247 	 * section begins on a 128-bit boundary, and is a multiple of
1248 	 * 64-bytes len.  A section is SK_MEMCMP_LEN-bytes long,
1249 	 * which means we need at least 16+(3*SK_MEMCMP_LEN) bytes.
1250 	 *
1251 	 * 1st section is s1 -> (hdr1 aligned to 16-bytes)
1252 	 * 2nd section is s2 -> (hdr2 = hdr1 + SK_MEMCMP_LEN)
1253 	 * 3rd section is s3 -> (mask = hdr2 + SK_MEMCMP_LEN)
1254 	 */
1255 	void *s1, *s2, *s3;
1256 	uintptr_t diff;
1257 
1258 	s1 = sk_dump_buf;
1259 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1260 		diff = P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN) - (uintptr_t)s1;
1261 		s1 = (void *)((char *)s1 + diff);
1262 	}
1263 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1264 	s2 = (void *)((char *)s1 + SK_MEMCMP_LEN);
1265 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1266 	s3 = (void *)((char *)s2 + SK_MEMCMP_LEN);
1267 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1268 
1269 	uint8_t *hdr1 = s1;
1270 	uint8_t *hdr2 = s2;
1271 	uint8_t *byte_m = s3;
1272 
1273 	/* fill byte mask with random data */
1274 	read_frandom(byte_m, SK_MEMCMP_LEN);
1275 
1276 	kprintf("Skywalk: memcmp mask ... ");
1277 
1278 	int i;
1279 	for (i = 0; i < 80; i++) {
1280 		hdr1[i] = 1;
1281 		SK_MEMCMP_MASK_VERIFY(ipv4, 32, 20);
1282 		SK_MEMCMP_MASK_VERIFY(ipv6, 64, 40);
1283 		SK_MEMCMP_MASK_VERIFY(ipv6_tcp, 80, 64);
1284 		SK_MEMCMP_MASK_VERIFY(tcp, 32, 24);
1285 		SK_MEMCMP_MASK_VERIFY(udp, 16, 6);
1286 		SK_MEMCMP_MASK_VERIFY(fk_all, 48, 48);
1287 		SK_MEMCMP_MASK_VERIFY(fk_t2, 48, 48);
1288 		SK_MEMCMP_MASK_VERIFY(fk_t3, 48, 48);
1289 		SK_MEMCMP_MASK_VERIFY(fk_t4, 48, 48);
1290 		SK_MEMCMP_MASK_VERIFY(fk_t5, 48, 48);
1291 		SK_MEMCMP_MASK_VERIFY(fk_i1, 48, 48);
1292 		SK_MEMCMP_MASK_VERIFY(fk_i2, 48, 48);
1293 		SK_MEMCMP_MASK_VERIFY(fk_i3, 48, 48);
1294 		hdr2[i] = 1;
1295 	}
1296 
1297 	bzero(hdr1, SK_MEMCMP_LEN);
1298 	bzero(hdr2, SK_MEMCMP_LEN);
1299 
1300 	/* re-fill byte mask with random data */
1301 	read_frandom(byte_m, SK_MEMCMP_LEN);
1302 
1303 	for (i = 0; i < SK_MEMCMP_LEN; i++) {
1304 		hdr1[i] = 1;
1305 		SK_MEMCMP_BYTEMASK_VERIFY(byte);
1306 		hdr2[i] = 1;
1307 	}
1308 
1309 	/* fill hdr1 and hd2 with random data */
1310 	read_frandom(hdr1, SK_MEMCMP_LEN);
1311 	bcopy(hdr1, hdr2, SK_MEMCMP_LEN);
1312 	memset(byte_m, 0xff, SK_MEMCMP_LEN);
1313 
1314 	for (i = 0; i < 80; i++) {
1315 		uint8_t val = hdr2[i];
1316 		uint8_t mval = byte_m[i];
1317 
1318 		while (hdr2[i] == hdr1[i] || hdr2[i] == 0) {
1319 			uint8_t newval;
1320 			read_frandom(&newval, sizeof(newval));
1321 			hdr2[i] = newval;
1322 		}
1323 		if (i < 16) {
1324 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 16);
1325 		} else if (i < 32) {
1326 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 32);
1327 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1328 		} else if (i < 48) {
1329 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 48);
1330 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1331 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1332 		} else if (i < 64) {
1333 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 64);
1334 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1335 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1336 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1337 		} else if (i < 80) {
1338 			SK_MEMCMP_MASK_MISMATCH_VERIFY(byte, 80);
1339 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1340 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1341 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1342 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1343 		}
1344 		byte_m[i] = 0;
1345 		if (i < 16) {
1346 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 16);
1347 		} else if (i < 32) {
1348 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 32);
1349 		} else if (i < 48) {
1350 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 48);
1351 		} else if (i < 64) {
1352 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 64);
1353 		} else if (i < 80) {
1354 			SK_MEMCMP_MASK_MATCH_VERIFY(byte, 80);
1355 		}
1356 		hdr2[i] = val;
1357 		byte_m[i] = mval;
1358 	}
1359 
1360 	kprintf("PASSED\n");
1361 }
1362 
1363 #define SK_COPY_LEN     128             /* length of each section */
1364 
1365 #define SK_COPY_PREPARE(t) do {                                         \
1366 	bzero(s2, SK_COPY_LEN);                                         \
1367 	bzero(s3, SK_COPY_LEN);                                         \
1368 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1369 	kprintf("Skywalk: " #t " ... ");                                \
1370 } while (0)
1371 
1372 #define SK_COPY_VERIFY(t) do {                                          \
1373 	if (_s1 != s1 || _s2 != s2 || _s3 != s3) {                      \
1374 	        panic_plain("\ninput registers clobbered: " #t "\n");   \
1375 	/* NOTREACHED */                                        \
1376 	        __builtin_unreachable();                                \
1377 	}                                                               \
1378 	if (bcmp(s2, s3, SK_COPY_LEN) != 0) {                           \
1379 	        panic_plain("\nbroken: " #t "\n");                      \
1380 	/* NOTREACHED */                                        \
1381 	        __builtin_unreachable();                                \
1382 	} else {                                                        \
1383 	        kprintf("PASSED\n");                                    \
1384 	}                                                               \
1385 } while (0)
1386 
1387 #define SK_ZERO_PREPARE(t) do {                                         \
1388 	bcopy(s1, s2, SK_COPY_LEN);                                     \
1389 	bcopy(s1, s3, SK_COPY_LEN);                                     \
1390 	_s1 = s1; _s2 = s2; _s3 = s3;                                   \
1391 	kprintf("Skywalk: " #t " ... ");                                \
1392 } while (0)
1393 
1394 #define SK_ZERO_VERIFY(t)       SK_COPY_VERIFY(t)
1395 
1396 static void
skywalk_self_tests(void)1397 skywalk_self_tests(void)
1398 {
1399 	void *s1, *s2, *s3;
1400 	void *_s1, *_s2, *_s3;
1401 	uintptr_t diff;
1402 
1403 	VERIFY(sk_dump_buf != NULL);
1404 
1405 	/*
1406 	 * Utilize sk_dump_buf, by splitting it into 3 sections.  Each
1407 	 * section begins on a 128-bit boundary, and is a multiple of
1408 	 * 64-bytes len.  A section is 128-bytes long, which means we
1409 	 * need at least 16+(3*128) bytes.
1410 	 *
1411 	 * 1st section is source buffer full of random data;
1412 	 * 2nd section is reference target based on bcopy;
1413 	 * 3rd section is test target base on our stuff.
1414 	 */
1415 	static_assert(SK_COPY_LEN != 0 && (SK_COPY_LEN % 128) == 0);
1416 	static_assert((SK_COPY_LEN % 16) == 0);
1417 	static_assert((SK_DUMP_BUF_ALIGN % 16) == 0);
1418 	static_assert(SK_DUMP_BUF_SIZE >= (SK_DUMP_BUF_ALIGN + (SK_COPY_LEN * 3)));
1419 
1420 	s1 = sk_dump_buf;
1421 	if (!IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN)) {
1422 		diff = P2ROUNDUP(s1, SK_DUMP_BUF_ALIGN) - (uintptr_t)s1;
1423 		s1 = (void *)((char *)s1 + diff);
1424 	}
1425 	ASSERT(IS_P2ALIGNED(s1, SK_DUMP_BUF_ALIGN));
1426 	s2 = (void *)((char *)s1 + SK_COPY_LEN);
1427 	ASSERT(IS_P2ALIGNED(s2, SK_DUMP_BUF_ALIGN));
1428 	s3 = (void *)((char *)s2 + SK_COPY_LEN);
1429 	ASSERT(IS_P2ALIGNED(s3, SK_DUMP_BUF_ALIGN));
1430 
1431 	/* fill s1 with random data */
1432 	read_frandom(s1, SK_COPY_LEN);
1433 
1434 	kprintf("Skywalk: running self-tests\n");
1435 
1436 	/* Copy 8-bytes, 64-bit aligned */
1437 	SK_COPY_PREPARE(sk_copy64_8);
1438 	bcopy(s1, s2, 8);
1439 	sk_copy64_8((uint64_t *)s1, (uint64_t *)s3);
1440 	SK_COPY_VERIFY(sk_copy64_8);
1441 
1442 	/* Copy 8-bytes, 32-bit aligned */
1443 	SK_COPY_PREPARE(sk_copy32_8);
1444 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1445 	    (void *)((char *)s2 + sizeof(uint32_t)), 8);
1446 	sk_copy32_8((void *)((char *)s1 + sizeof(uint32_t)),
1447 	    (void *)((char *)s3 + sizeof(uint32_t)));
1448 	SK_COPY_VERIFY(sk_copy32_8);
1449 
1450 	/* Copy 16-bytes, 64-bit aligned */
1451 	SK_COPY_PREPARE(sk_copy64_16);
1452 	bcopy(s1, s2, 16);
1453 	sk_copy64_16((uint64_t *)s1, (uint64_t *)s3);
1454 	SK_COPY_VERIFY(sk_copy64_16);
1455 
1456 	/* Copy 16-bytes, 32-bit aligned */
1457 	SK_COPY_PREPARE(sk_copy32_16);
1458 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1459 	    (void *)((char *)s2 + sizeof(uint32_t)), 16);
1460 	sk_copy32_16((void *)((char *)s1 + sizeof(uint32_t)),
1461 	    (void *)((char *)s3 + sizeof(uint32_t)));
1462 	SK_COPY_VERIFY(sk_copy32_16);
1463 
1464 	/* Copy 20-bytes, 64-bit aligned */
1465 	SK_COPY_PREPARE(sk_copy64_20);
1466 	bcopy(s1, s2, 20);
1467 	sk_copy64_20((uint64_t *)s1, (uint64_t *)s3);
1468 	SK_COPY_VERIFY(sk_copy64_20);
1469 
1470 	/* Copy 24-bytes, 64-bit aligned */
1471 	SK_COPY_PREPARE(sk_copy64_24);
1472 	bcopy(s1, s2, 24);
1473 	sk_copy64_24((uint64_t *)s1, (uint64_t *)s3);
1474 	SK_COPY_VERIFY(sk_copy64_24);
1475 
1476 	/* Copy 32-bytes, 64-bit aligned */
1477 	SK_COPY_PREPARE(sk_copy64_32);
1478 	bcopy(s1, s2, 32);
1479 	sk_copy64_32((uint64_t *)s1, (uint64_t *)s3);
1480 	SK_COPY_VERIFY(sk_copy64_32);
1481 
1482 	/* Copy 32-bytes, 32-bit aligned */
1483 	SK_COPY_PREPARE(sk_copy32_32);
1484 	bcopy((void *)((char *)s1 + sizeof(uint32_t)),
1485 	    (void *)((char *)s2 + sizeof(uint32_t)), 32);
1486 	sk_copy32_32((void *)((char *)s1 + sizeof(uint32_t)),
1487 	    (void *)((char *)s3 + sizeof(uint32_t)));
1488 	SK_COPY_VERIFY(sk_copy32_32);
1489 
1490 	/* Copy 40-bytes, 64-bit aligned */
1491 	SK_COPY_PREPARE(sk_copy64_40);
1492 	bcopy(s1, s2, 40);
1493 	sk_copy64_40((uint64_t *)s1, (uint64_t *)s3);
1494 	SK_COPY_VERIFY(sk_copy64_40);
1495 
1496 	/* Copy entire section in 64-bytes chunks, 64-bit aligned */
1497 	SK_COPY_PREPARE(sk_copy64_64x);
1498 	bcopy(s1, s2, SK_COPY_LEN);
1499 	sk_copy64_64x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1500 	SK_COPY_VERIFY(sk_copy64_64x);
1501 
1502 	/* Copy entire section in 32-bytes chunks, 64-bit aligned */
1503 	SK_COPY_PREPARE(sk_copy64_32x);
1504 	bcopy(s1, s2, SK_COPY_LEN);
1505 	sk_copy64_32x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1506 	SK_COPY_VERIFY(sk_copy64_32x);
1507 
1508 	/* Copy entire section in 8-bytes chunks, 64-bit aligned */
1509 	SK_COPY_PREPARE(sk_copy64_8x);
1510 	bcopy(s1, s2, SK_COPY_LEN);
1511 	sk_copy64_8x((uint64_t *)s1, (uint64_t *)s3, SK_COPY_LEN);
1512 	SK_COPY_VERIFY(sk_copy64_8x);
1513 
1514 	/* Copy entire section in 4-bytes chunks, 64-bit aligned */
1515 	SK_COPY_PREPARE(sk_copy64_4x);
1516 	bcopy(s1, s2, SK_COPY_LEN);
1517 	sk_copy64_4x((uint32_t *)s1, (uint32_t *)s3, SK_COPY_LEN);
1518 	SK_COPY_VERIFY(sk_copy64_4x);
1519 
1520 	/*
1521 	 * Re-use sk_dump_buf for testing sk_zero, same principle as above.
1522 	 *
1523 	 * 1st section is source buffer full of random data;
1524 	 * 2nd section is reference target based on bzero;
1525 	 * 3rd section is test target base on our stuff.
1526 	 */
1527 	SK_ZERO_PREPARE(sk_zero_16);
1528 	bzero(s2, 16);
1529 	sk_zero_16(s3);
1530 	SK_ZERO_VERIFY(sk_zero_16);
1531 
1532 	SK_ZERO_PREPARE(sk_zero_32);
1533 	bzero(s2, 32);
1534 	sk_zero_32(s3);
1535 	SK_ZERO_VERIFY(sk_zero_32);
1536 
1537 	SK_ZERO_PREPARE(sk_zero_48);
1538 	bzero(s2, 48);
1539 	sk_zero_48(s3);
1540 	SK_ZERO_VERIFY(sk_zero_48);
1541 
1542 	SK_ZERO_PREPARE(sk_zero_128);
1543 	bzero(s2, 128);
1544 	sk_zero_128(s3);
1545 	SK_ZERO_VERIFY(sk_zero_128);
1546 
1547 	/* Perform memcmp with mask self tests */
1548 	skywalk_memcmp_mask_self_tests();
1549 
1550 	/* reset sk_dump_buf */
1551 	bzero(sk_dump_buf, SK_DUMP_BUF_SIZE);
1552 
1553 	/* Keep packet trace code in sync with ariadne plist */
1554 	static_assert(SK_KTRACE_AON_IF_STATS == 0x8100004);
1555 
1556 	static_assert(SK_KTRACE_FSW_DEV_RING_FLUSH == 0x8110004);
1557 	static_assert(SK_KTRACE_FSW_USER_RING_FLUSH == 0x8110008);
1558 	static_assert(SK_KTRACE_FSW_FLOW_TRACK_RTT == 0x8110010);
1559 
1560 	static_assert(SK_KTRACE_NETIF_RING_TX_REFILL == 0x8120004);
1561 	static_assert(SK_KTRACE_NETIF_HOST_ENQUEUE == 0x8120008);
1562 	static_assert(SK_KTRACE_NETIF_MIT_RX_INTR == 0x812000c);
1563 	static_assert(SK_KTRACE_NETIF_COMMON_INTR == 0x8120010);
1564 	static_assert(SK_KTRACE_NETIF_RX_NOTIFY_DEFAULT == 0x8120014);
1565 	static_assert(SK_KTRACE_NETIF_RX_NOTIFY_FAST == 0x8120018);
1566 
1567 	static_assert(SK_KTRACE_CHANNEL_TX_REFILL == 0x8130004);
1568 
1569 	static_assert(SK_KTRACE_PKT_RX_DRV == 0x8140004);
1570 	static_assert(SK_KTRACE_PKT_RX_FSW == 0x8140008);
1571 	static_assert(SK_KTRACE_PKT_RX_CHN == 0x814000c);
1572 	static_assert(SK_KTRACE_PKT_TX_FSW == 0x8140040);
1573 	static_assert(SK_KTRACE_PKT_TX_AQM == 0x8140044);
1574 	static_assert(SK_KTRACE_PKT_TX_DRV == 0x8140048);
1575 }
1576 #endif /* DEVELOPMENT || DEBUG */
1577