xref: /xnu-12377.61.12/bsd/skywalk/mem/skmem_arena.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* BEGIN CSTYLED */
30 /*
31  * SKMEM_ARENA_TYPE_NEXUS:
32  *
33  *   This arena represents the memory subsystem of a nexus adapter.  It consist
34  *   of a collection of memory regions that are usable by the nexus, as well
35  *   as the various caches for objects in those regions.
36  *
37  *       (1 per nexus adapter)
38  *     +=======================+
39  *     |      skmem_arena      |
40  *     +-----------------------+              (backing regions)
41  *     |     ar_regions[0]     |           +=======================+
42  *     :          ...          : ------->> |     skmem_region      |===+
43  *     |     ar_regions[n]     |           +=======================+   |===+
44  *     +=======================+               +=======================+   |
45  *     |     arn_{caches,pp}   | ---+              +=======================+
46  *     +-----------------------+    |
47  *     |     arn_stats_obj     |    |
48  *     |     arn_flowadv_obj   |    |         (cache frontends)
49  *     |     arn_nexusadv_obj  |    |      +=======================+
50  *     +-----------------------+    +--->> |     skmem_cache       |===+
51  *                                         +=======================+   |===+
52  *                                             +=======================+   |
53  *                                                 +=======================+
54  *
55  *   Three regions {umd,kmd,buf} are used for the packet buffer pool, which
56  *   may be external to the nexus adapter, e.g. created by the driver or an
57  *   external entity.  If not supplied, we create these regions along with
58  *   the packet buffer pool ourselves.  The rest of the regions (unrelated
59  *   to the packet buffer pool) are unique to the arena and are allocated at
60  *   arena creation time.
61  *
62  *   An arena may be mapped to a user task/process for as many times as needed.
63  *   The result of each mapping is a contiguous range within the address space
64  *   of that task, indicated by [ami_mapaddr, ami_mapaddr + ami_mapsize) span.
65  *   This is achieved by leveraging the mapper memory object ar_mapper that
66  *   "stitches" the disjoint segments together.  Only user-mappable regions,
67  *   i.e. those marked with SKR_MODE_MMAPOK, will be included in this span.
68  *
69  *   Nexus adapters that are eligible for defunct will trigger the arena to
70  *   undergo memory redirection for all regions except those that are marked
71  *   with SKR_MODE_NOREDIRECT.  This happens when all of the channels opened
72  *   to the adapter are defunct.  Upon completion, those redirected regions
73  *   will be torn down in order to reduce their memory footprints.  When this
74  *   happens the adapter and its arena are no longer active or in service.
75  *
76  *   The arena exposes caches for allocating and freeing most region objects.
77  *   These slab-allocator based caches act as front-ends to the regions; only
78  *   the metadata cache (for kern_packet_t) utilizes the magazines layer.  All
79  *   other ones simply utilize skmem_cache for slab-based allocations.
80  *
81  *   Certain regions contain singleton objects that are simple enough to not
82  *   require the slab allocator, such as the ones used for statistics and flow
83  *   advisories.  Because of this, we directly allocate from those regions
84  *   and store the objects in the arena.
85  *
86  * SKMEM_ARENA_TYPE_NECP:
87  *
88  *   This arena represents the memory subsystem of an NECP file descriptor
89  *   object.  It consists of a memory region for per-flow statistics, as well
90  *   as a cache front-end for that region.
91  *
92  * SKMEM_ARENA_SYSTEM:
93  *
94  *   This arena represents general, system-wide objects.  It currently
95  *   consists of the sysctls region that's created once at init time.
96  */
97 /* END CSTYLED */
98 
99 #include <skywalk/os_skywalk_private.h>
100 #include <net/necp.h>
101 
102 #include <kern/uipc_domain.h>
103 
104 static void skmem_arena_destroy(struct skmem_arena *);
105 static void skmem_arena_teardown(struct skmem_arena *, boolean_t);
106 static int skmem_arena_create_finalize(struct skmem_arena *);
107 static void skmem_arena_nexus_teardown(struct skmem_arena_nexus *, boolean_t);
108 static void skmem_arena_necp_teardown(struct skmem_arena_necp *, boolean_t);
109 static void skmem_arena_system_teardown(struct skmem_arena_system *, boolean_t);
110 static void skmem_arena_init_common(struct skmem_arena *ar,
111     skmem_arena_type_t type, size_t ar_zsize, const char *ar_str, const char *name);
112 static void skmem_arena_free(struct skmem_arena *);
113 static void skmem_arena_retain_locked(struct skmem_arena *);
114 static void skmem_arena_reap_locked(struct skmem_arena *, boolean_t);
115 static boolean_t skmem_arena_munmap_common(struct skmem_arena *,
116     struct skmem_arena_mmap_info *);
117 #if SK_LOG
118 static void skmem_arena_create_region_log(struct skmem_arena *);
119 #endif /* SK_LOG */
120 static int skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS;
121 
122 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, arena,
123     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
124     0, 0, skmem_arena_mib_get_sysctl, "S,sk_stats_arena",
125     "Skywalk arena statistics");
126 
127 static LCK_GRP_DECLARE(skmem_arena_lock_grp, "skmem_arena");
128 static LCK_MTX_DECLARE(skmem_arena_lock, &skmem_arena_lock_grp);
129 
130 static TAILQ_HEAD(, skmem_arena) skmem_arena_head = TAILQ_HEAD_INITIALIZER(skmem_arena_head);
131 
132 #define SKMEM_ARENA_LOCK()                      \
133 	lck_mtx_lock(&skmem_arena_lock)
134 #define SKMEM_ARENA_LOCK_ASSERT_HELD()          \
135 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_OWNED)
136 #define SKMEM_ARENA_LOCK_ASSERT_NOTHELD()       \
137 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_NOTOWNED)
138 #define SKMEM_ARENA_UNLOCK()                    \
139 	lck_mtx_unlock(&skmem_arena_lock)
140 
141 #define AR_NEXUS_SIZE           sizeof(struct skmem_arena_nexus)
142 static SKMEM_TYPE_DEFINE(ar_nexus_zone, struct skmem_arena_nexus);
143 
144 #define AR_NECP_SIZE            sizeof(struct skmem_arena_necp)
145 static SKMEM_TYPE_DEFINE(ar_necp_zone, struct skmem_arena_necp);
146 
147 #define AR_SYSTEM_SIZE          sizeof(struct skmem_arena_system)
148 static SKMEM_TYPE_DEFINE(ar_system_zone, struct skmem_arena_system);
149 
150 #define SKMEM_TAG_ARENA_MIB     "com.apple.skywalk.arena.mib"
151 static SKMEM_TAG_DEFINE(skmem_tag_arena_mib, SKMEM_TAG_ARENA_MIB);
152 
153 static_assert(SKMEM_ARENA_TYPE_NEXUS == SAR_TYPE_NEXUS);
154 static_assert(SKMEM_ARENA_TYPE_NECP == SAR_TYPE_NECP);
155 static_assert(SKMEM_ARENA_TYPE_SYSTEM == SAR_TYPE_SYSTEM);
156 
157 SK_NO_INLINE_ATTRIBUTE
158 static int
skmem_arena_sd_setup(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct skmem_arena * ar,boolean_t kernel_only,boolean_t tx)159 skmem_arena_sd_setup(const struct nexus_adapter *na,
160     struct skmem_region_params srp[SKMEM_REGIONS], struct skmem_arena *ar,
161     boolean_t kernel_only, boolean_t tx)
162 {
163 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
164 	struct skmem_cache **cachep;
165 	struct skmem_region *ksd_skr = NULL, *usd_skr = NULL;
166 	const char *__null_terminated name = NULL;
167 	char cname[64];
168 	skmem_region_id_t usd_type, ksd_type;
169 	int err = 0;
170 
171 	usd_type = tx ? SKMEM_REGION_TXAUSD : SKMEM_REGION_RXFUSD;
172 	ksd_type = tx ? SKMEM_REGION_TXAKSD : SKMEM_REGION_RXFKSD;
173 	if (tx) {
174 		usd_type = SKMEM_REGION_TXAUSD;
175 		ksd_type = SKMEM_REGION_TXAKSD;
176 		cachep = &arn->arn_txaksd_cache;
177 	} else {
178 		usd_type = SKMEM_REGION_RXFUSD;
179 		ksd_type = SKMEM_REGION_RXFKSD;
180 		cachep = &arn->arn_rxfksd_cache;
181 	}
182 	name = __unsafe_null_terminated_from_indexable(na->na_name);
183 	ksd_skr = skmem_region_create(name, &srp[ksd_type], NULL, NULL, NULL);
184 	if (ksd_skr == NULL) {
185 		SK_ERR("\"%s\" ar 0x%p flags 0x%x failed to create %s region",
186 		    ar->ar_name, SK_KVA(ar), ar->ar_flags,
187 		    srp[ksd_type].srp_name);
188 		err = ENOMEM;
189 		goto failed;
190 	}
191 	ar->ar_regions[ksd_type] = ksd_skr;
192 	if (!kernel_only) {
193 		usd_skr = skmem_region_create(name, &srp[usd_type], NULL,
194 		    NULL, NULL);
195 		if (usd_skr == NULL) {
196 			err = ENOMEM;
197 			goto failed;
198 		}
199 		ar->ar_regions[usd_type] = usd_skr;
200 		skmem_region_mirror(ksd_skr, usd_skr);
201 	}
202 	name = tsnprintf(cname, sizeof(cname), "%s_ksd.%.*s",
203 	    tx ? "txa" : "rxf", (int)sizeof(na->na_name), na->na_name);
204 	ASSERT(ar->ar_regions[ksd_type] != NULL);
205 	*cachep = skmem_cache_create(name, srp[ksd_type].srp_c_obj_size, 0,
206 	    NULL, NULL, NULL, NULL, ar->ar_regions[ksd_type],
207 	    SKMEM_CR_NOMAGAZINES);
208 	if (*cachep == NULL) {
209 		SK_ERR("\"%s\" ar %p flags 0x%x failed to create %s",
210 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, cname);
211 		err = ENOMEM;
212 		goto failed;
213 	}
214 	return 0;
215 
216 failed:
217 	if (ksd_skr != NULL) {
218 		skmem_region_release(ksd_skr);
219 		ar->ar_regions[ksd_type] = NULL;
220 	}
221 	if (usd_skr != NULL) {
222 		/*
223 		 * decrements refcnt incremented by skmem_region_mirror()
224 		 * this is not needed in case skmem_cache_create() succeeds
225 		 * because skmem_cache_destroy() does the release.
226 		 */
227 		skmem_region_release(usd_skr);
228 
229 		/* decrements the region's own refcnt */
230 		skmem_region_release(usd_skr);
231 		ar->ar_regions[usd_type] = NULL;
232 	}
233 	return err;
234 }
235 
236 SK_NO_INLINE_ATTRIBUTE
237 static void
skmem_arena_sd_teardown(struct skmem_arena * ar,boolean_t tx)238 skmem_arena_sd_teardown(struct skmem_arena *ar, boolean_t tx)
239 {
240 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
241 	struct skmem_cache **cachep;
242 	struct skmem_region **ksd_rp, **usd_rp;
243 
244 	if (tx) {
245 		cachep = &arn->arn_txaksd_cache;
246 		ksd_rp = &ar->ar_regions[SKMEM_REGION_TXAKSD];
247 		usd_rp = &ar->ar_regions[SKMEM_REGION_TXAUSD];
248 	} else {
249 		cachep = &arn->arn_rxfksd_cache;
250 		ksd_rp = &ar->ar_regions[SKMEM_REGION_RXFKSD];
251 		usd_rp = &ar->ar_regions[SKMEM_REGION_RXFUSD];
252 	}
253 	if (*cachep != NULL) {
254 		skmem_cache_destroy(*cachep);
255 		*cachep = NULL;
256 	}
257 	if (*usd_rp != NULL) {
258 		skmem_region_release(*usd_rp);
259 		*usd_rp = NULL;
260 	}
261 	if (*ksd_rp != NULL) {
262 		skmem_region_release(*ksd_rp);
263 		*ksd_rp = NULL;
264 	}
265 }
266 
267 static bool
skmem_arena_pp_setup(struct skmem_arena * ar,struct skmem_region_params srp[SKMEM_REGIONS],const char * name,struct kern_pbufpool * rx_pp,struct kern_pbufpool * tx_pp,boolean_t kernel_only,boolean_t pp_truncated_buf)268 skmem_arena_pp_setup(struct skmem_arena *ar,
269     struct skmem_region_params srp[SKMEM_REGIONS], const char *name,
270     struct kern_pbufpool *rx_pp, struct kern_pbufpool *tx_pp,
271     boolean_t kernel_only, boolean_t pp_truncated_buf)
272 {
273 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
274 
275 	if (rx_pp == NULL && tx_pp == NULL) {
276 		uint32_t ppcreatef = 0;
277 		if (pp_truncated_buf) {
278 			ppcreatef |= PPCREATEF_TRUNCATED_BUF;
279 		}
280 		if (kernel_only) {
281 			ppcreatef |= PPCREATEF_KERNEL_ONLY;
282 		}
283 		if (srp[SKMEM_REGION_KMD].srp_max_frags > 1) {
284 			ppcreatef |= PPCREATEF_ONDEMAND_BUF;
285 		}
286 		/* callee retains pp upon success */
287 		rx_pp = pp_create(name, srp, NULL, NULL, NULL, NULL, NULL,
288 		    ppcreatef);
289 		if (rx_pp == NULL) {
290 			SK_ERR("\"%s\" ar %p flags 0x%x failed to create pp",
291 			    ar->ar_name, SK_KVA(ar), ar->ar_flags);
292 			return false;
293 		}
294 		pp_retain(rx_pp);
295 		tx_pp = rx_pp;
296 	} else {
297 		if (rx_pp == NULL) {
298 			rx_pp = tx_pp;
299 		} else if (tx_pp == NULL) {
300 			tx_pp = rx_pp;
301 		}
302 
303 		ASSERT(rx_pp->pp_md_type == tx_pp->pp_md_type);
304 		ASSERT(rx_pp->pp_md_subtype == tx_pp->pp_md_subtype);
305 		ASSERT(!(!kernel_only &&
306 		    (PP_KERNEL_ONLY(rx_pp) || (PP_KERNEL_ONLY(tx_pp)))));
307 		arn->arn_mode |= AR_NEXUS_MODE_EXTERNAL_PPOOL;
308 		pp_retain(rx_pp);
309 		pp_retain(tx_pp);
310 	}
311 
312 	arn->arn_rx_pp = rx_pp;
313 	arn->arn_tx_pp = tx_pp;
314 	if (rx_pp == tx_pp) {
315 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
316 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
317 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
318 		}
319 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = PP_BUF_REGION_DEF(rx_pp);
320 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] =
321 		    PP_BUF_REGION_LARGE(rx_pp);
322 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] = NULL;
323 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] = NULL;
324 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] = NULL;
325 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] = NULL;
326 		skmem_region_retain(rx_pp->pp_kmd_region);
327 		ar->ar_regions[SKMEM_REGION_KMD] = rx_pp->pp_kmd_region;
328 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
329 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
330 		if (rx_pp->pp_kbft_region != NULL) {
331 			skmem_region_retain(rx_pp->pp_kbft_region);
332 			ar->ar_regions[SKMEM_REGION_KBFT] =
333 			    rx_pp->pp_kbft_region;
334 		}
335 		ar->ar_regions[SKMEM_REGION_RXKBFT] = NULL;
336 		ar->ar_regions[SKMEM_REGION_TXKBFT] = NULL;
337 	} else {
338 		ASSERT(kernel_only); /* split userspace pools not supported */
339 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = NULL;
340 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] = NULL;
341 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
342 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] =
343 		    PP_BUF_REGION_DEF(rx_pp);
344 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] =
345 		    PP_BUF_REGION_LARGE(rx_pp);
346 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
347 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
348 		}
349 		skmem_region_retain(PP_BUF_REGION_DEF(tx_pp));
350 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] =
351 		    PP_BUF_REGION_DEF(tx_pp);
352 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] =
353 		    PP_BUF_REGION_LARGE(tx_pp);
354 		if (PP_BUF_REGION_LARGE(tx_pp) != NULL) {
355 			skmem_region_retain(PP_BUF_REGION_LARGE(tx_pp));
356 		}
357 		ar->ar_regions[SKMEM_REGION_KMD] = NULL;
358 		skmem_region_retain(rx_pp->pp_kmd_region);
359 		ar->ar_regions[SKMEM_REGION_RXKMD] = rx_pp->pp_kmd_region;
360 		skmem_region_retain(tx_pp->pp_kmd_region);
361 		ar->ar_regions[SKMEM_REGION_TXKMD] = tx_pp->pp_kmd_region;
362 		ar->ar_regions[SKMEM_REGION_KBFT] = NULL;
363 		if (rx_pp->pp_kbft_region != NULL) {
364 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(rx_pp));
365 			skmem_region_retain(rx_pp->pp_kbft_region);
366 			ar->ar_regions[SKMEM_REGION_RXKBFT] =
367 			    rx_pp->pp_kbft_region;
368 		}
369 		if (tx_pp->pp_kbft_region != NULL) {
370 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(tx_pp));
371 			skmem_region_retain(tx_pp->pp_kbft_region);
372 			ar->ar_regions[SKMEM_REGION_TXKBFT] =
373 			    tx_pp->pp_kbft_region;
374 		}
375 	}
376 
377 	if (kernel_only) {
378 		if ((arn->arn_mode & AR_NEXUS_MODE_EXTERNAL_PPOOL) == 0) {
379 			ASSERT(PP_KERNEL_ONLY(rx_pp));
380 			ASSERT(PP_KERNEL_ONLY(tx_pp));
381 			ASSERT(rx_pp->pp_umd_region == NULL);
382 			ASSERT(tx_pp->pp_umd_region == NULL);
383 			ASSERT(rx_pp->pp_kmd_region->skr_mirror == NULL);
384 			ASSERT(tx_pp->pp_kmd_region->skr_mirror == NULL);
385 			ASSERT(rx_pp->pp_ubft_region == NULL);
386 			ASSERT(tx_pp->pp_ubft_region == NULL);
387 			if (rx_pp->pp_kbft_region != NULL) {
388 				ASSERT(rx_pp->pp_kbft_region->skr_mirror ==
389 				    NULL);
390 			}
391 			if (tx_pp->pp_kbft_region != NULL) {
392 				ASSERT(tx_pp->pp_kbft_region->skr_mirror ==
393 				    NULL);
394 			}
395 		}
396 	} else {
397 		ASSERT(rx_pp == tx_pp);
398 		ASSERT(!PP_KERNEL_ONLY(rx_pp));
399 		ASSERT(rx_pp->pp_umd_region->skr_mode & SKR_MODE_MIRRORED);
400 		ASSERT(rx_pp->pp_kmd_region->skr_mirror != NULL);
401 		ar->ar_regions[SKMEM_REGION_UMD] = rx_pp->pp_umd_region;
402 		skmem_region_retain(rx_pp->pp_umd_region);
403 		if (rx_pp->pp_kbft_region != NULL) {
404 			ASSERT(rx_pp->pp_kbft_region->skr_mirror != NULL);
405 			ASSERT(rx_pp->pp_ubft_region != NULL);
406 			ASSERT(rx_pp->pp_ubft_region->skr_mode &
407 			    SKR_MODE_MIRRORED);
408 			ar->ar_regions[SKMEM_REGION_UBFT] =
409 			    rx_pp->pp_ubft_region;
410 			skmem_region_retain(rx_pp->pp_ubft_region);
411 		}
412 	}
413 
414 	arn->arn_md_type = rx_pp->pp_md_type;
415 	arn->arn_md_subtype = rx_pp->pp_md_subtype;
416 	return true;
417 }
418 
419 static void
skmem_arena_init_common(struct skmem_arena * ar,skmem_arena_type_t type,size_t ar_zsize,const char * ar_str,const char * name)420 skmem_arena_init_common(struct skmem_arena *ar, skmem_arena_type_t type,
421     size_t ar_zsize, const char *ar_str, const char *name)
422 {
423 	ar->ar_type = type;
424 	ar->ar_zsize = ar_zsize;
425 	lck_mtx_init(&ar->ar_lock, &skmem_arena_lock_grp,
426 	    LCK_ATTR_NULL);
427 	(void) snprintf(ar->ar_name, sizeof(ar->ar_name),
428 	    "%s.%s.%s", SKMEM_ARENA_PREFIX, ar_str, name);
429 }
430 
431 /*
432  * Create a nexus adapter arena.
433  */
434 struct skmem_arena *
skmem_arena_create_for_nexus(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct kern_pbufpool ** tx_pp,struct kern_pbufpool ** rx_pp,boolean_t pp_truncated_buf,boolean_t kernel_only,struct kern_nexus_advisory * nxv,int * perr)435 skmem_arena_create_for_nexus(const struct nexus_adapter *na,
436     struct skmem_region_params srp[SKMEM_REGIONS], struct kern_pbufpool **tx_pp,
437     struct kern_pbufpool **rx_pp, boolean_t pp_truncated_buf,
438     boolean_t kernel_only, struct kern_nexus_advisory *nxv, int *perr)
439 {
440 #define SRP_CFLAGS(_id)         (srp[_id].srp_cflags)
441 	struct skmem_arena *ar;
442 	struct skmem_arena_nexus *__single arn;
443 	char cname[64];
444 	uint32_t i;
445 	const char *__null_terminated name =
446 	    __unsafe_null_terminated_from_indexable(na->na_name);
447 	uint32_t msize = 0;
448 	void *__sized_by(msize) maddr = NULL;
449 
450 	*perr = 0;
451 
452 	arn = zalloc_flags(ar_nexus_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
453 	ar = &arn->arn_cmn;
454 	skmem_arena_init_common(ar, SKMEM_ARENA_TYPE_NEXUS, AR_NEXUS_SIZE,
455 	    "nexus", name);
456 
457 	ASSERT(ar != NULL && ar->ar_zsize == AR_NEXUS_SIZE);
458 
459 	/* these regions must not be readable/writeable */
460 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_GUARD);
461 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_GUARD);
462 
463 	/* these regions must be read-only */
464 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_UREADONLY);
465 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_UREADONLY);
466 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_UREADONLY);
467 	if ((na->na_flags & NAF_USER_PKT_POOL) == 0) {
468 		ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
469 		    SKMEM_REGION_CR_UREADONLY);
470 		ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
471 		    SKMEM_REGION_CR_UREADONLY);
472 	} else {
473 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
474 		    SKMEM_REGION_CR_UREADONLY));
475 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
476 		    SKMEM_REGION_CR_UREADONLY));
477 	}
478 
479 	/* these regions must be user-mappable */
480 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_MMAPOK);
481 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_MMAPOK);
482 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_MMAPOK);
483 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_MMAPOK);
484 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_MMAPOK);
485 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_MMAPOK);
486 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_MMAPOK);
487 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_MMAPOK);
488 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_MMAPOK);
489 	ASSERT(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_MMAPOK);
490 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_MMAPOK);
491 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_MMAPOK);
492 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_MMAPOK);
493 
494 	/* these must not be user-mappable */
495 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MMAPOK));
496 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MMAPOK));
497 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MMAPOK));
498 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MMAPOK));
499 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MMAPOK));
500 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MMAPOK));
501 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_MMAPOK));
502 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_MMAPOK));
503 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_MMAPOK));
504 
505 	/* these regions must be shareable */
506 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_SHAREOK);
507 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
508 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
509 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
510 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
511 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
512 
513 	/* these regions must not be be shareable */
514 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_SHAREOK));
515 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_SHAREOK));
516 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_SHAREOK));
517 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_SHAREOK));
518 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_SHAREOK));
519 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_SHAREOK));
520 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_SHAREOK));
521 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_SHAREOK));
522 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_SHAREOK));
523 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_SHAREOK));
524 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_SHAREOK));
525 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_SHAREOK));
526 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_SHAREOK));
527 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_SHAREOK));
528 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_SHAREOK));
529 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_SHAREOK));
530 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_SHAREOK));
531 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_SHAREOK));
532 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_SHAREOK));
533 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_SHAREOK));
534 
535 	/* these must stay active */
536 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_NOREDIRECT);
537 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_NOREDIRECT);
538 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_NOREDIRECT);
539 
540 	/* no kstats for nexus */
541 	ASSERT(srp[SKMEM_REGION_KSTATS].srp_c_obj_cnt == 0);
542 
543 	/* these regions have memtag enabled */
544 	ASSERT(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MEMTAG);
545 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MEMTAG);
546 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MEMTAG);
547 	ASSERT(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MEMTAG);
548 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MEMTAG);
549 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MEMTAG);
550 
551 	AR_LOCK(ar);
552 	if (!skmem_arena_pp_setup(ar, srp, name, (rx_pp ? *rx_pp : NULL),
553 	    (tx_pp ? *tx_pp : NULL), kernel_only, pp_truncated_buf)) {
554 		goto failed;
555 	}
556 
557 	if (nxv != NULL && nxv->nxv_reg != NULL) {
558 		struct skmem_region *skr = nxv->nxv_reg;
559 
560 		ASSERT(skr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
561 		ASSERT(skr->skr_seg_max_cnt == 1);
562 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = skr;
563 		skmem_region_retain(skr);
564 
565 		ASSERT(nxv->nxv_adv != NULL);
566 		if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
567 			VERIFY(nxv->flowswitch_nxv_adv->nxadv_ver ==
568 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
569 		} else if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
570 			VERIFY(nxv->netif_nxv_adv->nna_version ==
571 			    NX_NETIF_ADVISORY_CURRENT_VERSION);
572 		} else {
573 			panic_plain("%s: invalid advisory type %d",
574 			    __func__, nxv->nxv_adv_type);
575 			/* NOTREACHED */
576 		}
577 		arn->arn_nexusadv_obj = nxv->nxv_adv;
578 	} else {
579 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
580 		ASSERT(srp[SKMEM_REGION_NEXUSADV].srp_c_obj_cnt == 0);
581 	}
582 
583 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, TRUE) != 0) {
584 		goto failed;
585 	}
586 
587 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, FALSE) != 0) {
588 		goto failed;
589 	}
590 
591 	for (i = 0; i < SKMEM_REGIONS; i++) {
592 		/* skip if already created */
593 		if (ar->ar_regions[i] != NULL) {
594 			continue;
595 		}
596 
597 		/* skip external regions from packet pool */
598 		if (skmem_region_for_pp(i)) {
599 			continue;
600 		}
601 
602 		/* skip slot descriptor regions */
603 		if (i == SKMEM_REGION_TXAUSD || i == SKMEM_REGION_RXFUSD ||
604 		    i == SKMEM_REGION_TXAKSD || i == SKMEM_REGION_RXFKSD) {
605 			continue;
606 		}
607 
608 		/* skip if region is configured to be empty */
609 		if (srp[i].srp_c_obj_cnt == 0) {
610 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
611 			    i == SKMEM_REGION_USTATS ||
612 			    i == SKMEM_REGION_KSTATS ||
613 			    i == SKMEM_REGION_INTRINSIC ||
614 			    i == SKMEM_REGION_FLOWADV ||
615 			    i == SKMEM_REGION_NEXUSADV ||
616 			    i == SKMEM_REGION_SYSCTLS ||
617 			    i == SKMEM_REGION_GUARD_TAIL);
618 			continue;
619 		}
620 
621 		ASSERT(srp[i].srp_id == i);
622 
623 		/*
624 		 * Skip {SCHEMA, RING, GUARD} for kernel-only arena.  Note
625 		 * that this is assuming kernel-only arena is always used
626 		 * for kernel-only nexus adapters (never used directly by
627 		 * user process.)
628 		 *
629 		 * XXX [email protected] - see comments in kern_pbufpool_create().
630 		 * We need to revisit this logic for "direct channel" access,
631 		 * perhaps via a separate adapter flag.
632 		 */
633 		if (kernel_only && (i == SKMEM_REGION_GUARD_HEAD ||
634 		    i == SKMEM_REGION_SCHEMA || i == SKMEM_REGION_RING ||
635 		    i == SKMEM_REGION_GUARD_TAIL)) {
636 			continue;
637 		}
638 
639 		/* not for nexus, or for us to create here */
640 		ASSERT(i != SKMEM_REGION_GUARD_HEAD || sk_guard);
641 		ASSERT(i != SKMEM_REGION_NEXUSADV);
642 		ASSERT(i != SKMEM_REGION_SYSCTLS);
643 		ASSERT(i != SKMEM_REGION_GUARD_TAIL || sk_guard);
644 		ASSERT(i != SKMEM_REGION_KSTATS);
645 		ASSERT(i != SKMEM_REGION_INTRINSIC);
646 
647 		/* otherwise create it */
648 		if ((ar->ar_regions[i] = skmem_region_create(name, &srp[i],
649 		    NULL, NULL, NULL)) == NULL) {
650 			SK_ERR("\"%s\" ar %p flags 0x%x failed to "
651 			    "create %s region", ar->ar_name, SK_KVA(ar),
652 			    ar->ar_flags, srp[i].srp_name);
653 			goto failed;
654 		}
655 	}
656 
657 	/* create skmem_cache for schema (without magazines) */
658 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL || kernel_only);
659 	if (ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL) {
660 		name = tsnprintf(cname, sizeof(cname), "schema.%.*s",
661 		    (int)sizeof(na->na_name), na->na_name);
662 		if ((arn->arn_schema_cache = skmem_cache_create(name,
663 		    srp[SKMEM_REGION_SCHEMA].srp_c_obj_size, 0, NULL,
664 		    NULL, NULL, NULL, ar->ar_regions[SKMEM_REGION_SCHEMA],
665 		    SKMEM_CR_NOMAGAZINES)) == NULL) {
666 			SK_ERR("\"%s\" ar %p flags 0x%x failed to create %s",
667 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, cname);
668 			goto failed;
669 		}
670 	}
671 
672 	/* create skmem_cache for rings (without magazines) */
673 	name = tsnprintf(cname, sizeof(cname), "ring.%.*s",
674 	    (int)sizeof(na->na_name), na->na_name);
675 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] != NULL || kernel_only);
676 	if ((ar->ar_regions[SKMEM_REGION_RING] != NULL) &&
677 	    (arn->arn_ring_cache = skmem_cache_create(name,
678 	    srp[SKMEM_REGION_RING].srp_c_obj_size, 0, NULL, NULL, NULL,
679 	    NULL, ar->ar_regions[SKMEM_REGION_RING],
680 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
681 		SK_ERR("\"%s\" ar %p flags 0x%x failed to create %s",
682 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, cname);
683 		goto failed;
684 	}
685 
686 	/*
687 	 * If the stats region is present, allocate a single object directly
688 	 * from the region; we don't need to create an skmem_cache for this,
689 	 * as the object is allocated (and freed) only once.
690 	 */
691 	if (ar->ar_regions[SKMEM_REGION_USTATS] != NULL) {
692 		struct skmem_region *skr = ar->ar_regions[SKMEM_REGION_USTATS];
693 		void *obj;
694 
695 		/* no kstats for nexus */
696 		ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
697 		ASSERT(skr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
698 		ASSERT(skr->skr_seg_max_cnt == 1);
699 
700 		if ((obj = skmem_region_alloc(skr, &maddr,
701 		    NULL, NULL, SKMEM_SLEEP, skr->skr_c_obj_size, &msize)) == NULL) {
702 			SK_ERR("\"%s\" ar %p flags 0x%x failed to alloc stats",
703 			    ar->ar_name, SK_KVA(ar), ar->ar_flags);
704 			goto failed;
705 		}
706 		arn->arn_stats_obj = obj;
707 		arn->arn_stats_obj_size = skr->skr_c_obj_size;
708 	}
709 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
710 
711 	/*
712 	 * If the flowadv region is present, allocate a single object directly
713 	 * from the region; we don't need to create an skmem_cache for this,
714 	 * as the object is allocated (and freed) only once.
715 	 */
716 	if (ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL) {
717 		struct skmem_region *skr =
718 		    ar->ar_regions[SKMEM_REGION_FLOWADV];
719 		void *obj;
720 
721 		ASSERT(skr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
722 		ASSERT(skr->skr_seg_max_cnt == 1);
723 
724 		if ((obj = skmem_region_alloc(skr, &maddr,
725 		    NULL, NULL, SKMEM_SLEEP, skr->skr_c_obj_size, &msize)) == NULL) {
726 			SK_ERR("\"%s\" ar %p flags 0x%x failed to alloc "
727 			    "flowadv", ar->ar_name, SK_KVA(ar), ar->ar_flags);
728 			goto failed;
729 		}
730 		/* XXX -fbounds-safety: should get the count elsewhere */
731 		arn->arn_flowadv_obj = obj;
732 		arn->arn_flowadv_entries = sk_max_flows;
733 	}
734 
735 	if (skmem_arena_create_finalize(ar) != 0) {
736 		SK_ERR("\"%s\" ar %p flags 0x%x failed to finalize",
737 		    ar->ar_name, SK_KVA(ar), ar->ar_flags);
738 		goto failed;
739 	}
740 
741 	++ar->ar_refcnt;        /* for caller */
742 	AR_UNLOCK(ar);
743 
744 	SKMEM_ARENA_LOCK();
745 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
746 	SKMEM_ARENA_UNLOCK();
747 
748 	/* caller didn't give us one, but would like us to return it? */
749 	if (rx_pp != NULL && *rx_pp == NULL) {
750 		*rx_pp = arn->arn_rx_pp;
751 		pp_retain(*rx_pp);
752 	}
753 	if (tx_pp != NULL && *tx_pp == NULL) {
754 		*tx_pp = arn->arn_tx_pp;
755 		pp_retain(*tx_pp);  /* for caller */
756 	}
757 
758 #if SK_LOG
759 	if (__improbable(sk_verbose != 0)) {
760 		skmem_arena_create_region_log(ar);
761 	}
762 #endif /* SK_LOG */
763 
764 	return ar;
765 
766 failed:
767 	AR_LOCK_ASSERT_HELD(ar);
768 	skmem_arena_destroy(ar);
769 	*perr = ENOMEM;
770 
771 	return NULL;
772 #undef SRP_CFLAGS
773 }
774 
775 void
skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus * arn,int cnt)776 skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus *arn, int cnt)
777 {
778 	struct skmem_arena *ar = &arn->arn_cmn;
779 
780 	AR_LOCK(ar);
781 	arn->arn_ksd_nodefunct += cnt;
782 	VERIFY(arn->arn_ksd_nodefunct >= 0);
783 	AR_UNLOCK(ar);
784 }
785 
786 boolean_t
skmem_arena_nexus_sd_idle(struct skmem_arena_nexus * arn)787 skmem_arena_nexus_sd_idle(struct skmem_arena_nexus *arn)
788 {
789 	struct skmem_arena *ar = &arn->arn_cmn;
790 	boolean_t idle;
791 
792 	AR_LOCK(ar);
793 	VERIFY(arn->arn_ksd_nodefunct >= 0);
794 	idle = (arn->arn_ksd_nodefunct == 0);
795 	AR_UNLOCK(ar);
796 
797 	return idle;
798 }
799 
800 static void
skmem_arena_nexus_teardown(struct skmem_arena_nexus * arn,boolean_t defunct)801 skmem_arena_nexus_teardown(struct skmem_arena_nexus *arn, boolean_t defunct)
802 {
803 	struct skmem_arena *ar = &arn->arn_cmn;
804 	struct skmem_region *skr;
805 	int i;
806 
807 	AR_LOCK_ASSERT_HELD(ar);
808 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
809 
810 	/* these should never be set for nexus arena */
811 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL || sk_guard);
812 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
813 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL || sk_guard);
814 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
815 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
816 
817 	if (arn->arn_stats_obj != NULL) {
818 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
819 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
820 		skmem_region_free(skr, arn->arn_stats_obj, NULL);
821 		arn->arn_stats_obj_size = 0;
822 		arn->arn_stats_obj = NULL;
823 		skmem_region_release(skr);
824 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
825 	}
826 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
827 	ASSERT(arn->arn_stats_obj == NULL);
828 
829 	if (arn->arn_flowadv_obj != NULL) {
830 		skr = ar->ar_regions[SKMEM_REGION_FLOWADV];
831 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
832 
833 		/* XXX -fbounds-safety */
834 		void *obj = __unsafe_forge_bidi_indexable(void *,
835 		    arn->arn_flowadv_obj, skr->skr_c_obj_size);
836 		skmem_region_free(skr, obj, NULL);
837 		arn->arn_flowadv_obj = NULL;
838 		arn->arn_flowadv_entries = 0;
839 		skmem_region_release(skr);
840 		ar->ar_regions[SKMEM_REGION_FLOWADV] = NULL;
841 	}
842 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
843 	ASSERT(arn->arn_flowadv_obj == NULL);
844 
845 	if (arn->arn_nexusadv_obj != NULL) {
846 		skr = ar->ar_regions[SKMEM_REGION_NEXUSADV];
847 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
848 		/* we didn't allocate this, so just nullify it */
849 		arn->arn_nexusadv_obj = NULL;
850 		skmem_region_release(skr);
851 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = NULL;
852 	}
853 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
854 	ASSERT(arn->arn_nexusadv_obj == NULL);
855 
856 	ASSERT(!((arn->arn_rx_pp == NULL) ^ (arn->arn_tx_pp == NULL)));
857 	if (arn->arn_rx_pp != NULL) {
858 		for (i = 0; i < SKMEM_PP_REGIONS; i++) {
859 			skmem_region_id_t reg = skmem_pp_region_ids[i];
860 			skr = ar->ar_regions[reg];
861 			if (skr != NULL) {
862 				ASSERT(!(skr->skr_mode & SKR_MODE_NOREDIRECT));
863 				skmem_region_release(skr);
864 				ar->ar_regions[reg] = NULL;
865 			}
866 		}
867 		pp_release(arn->arn_rx_pp);
868 		pp_release(arn->arn_tx_pp);
869 		arn->arn_rx_pp = NULL;
870 		arn->arn_tx_pp = NULL;
871 	}
872 	for (i = 0; i < SKMEM_PP_REGIONS; i++) {
873 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
874 	}
875 	ASSERT(arn->arn_rx_pp == NULL);
876 	ASSERT(arn->arn_tx_pp == NULL);
877 
878 	if (arn->arn_ring_cache != NULL) {
879 		skr = ar->ar_regions[SKMEM_REGION_RING];
880 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
881 		skmem_cache_destroy(arn->arn_ring_cache);
882 		arn->arn_ring_cache = NULL;
883 		skmem_region_release(skr);
884 		ar->ar_regions[SKMEM_REGION_RING] = NULL;
885 	}
886 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
887 	ASSERT(arn->arn_ring_cache == NULL);
888 
889 	/*
890 	 * Stop here if we're in the defunct context, and we're asked
891 	 * to keep the slot descriptor regions alive as they are still
892 	 * being referred to by the nexus owner (driver).
893 	 */
894 	if (defunct && arn->arn_ksd_nodefunct != 0) {
895 		ASSERT(arn->arn_ksd_nodefunct > 0);
896 		return;
897 	}
898 
899 	ASSERT(arn->arn_ksd_nodefunct == 0);
900 	skmem_arena_sd_teardown(ar, TRUE);
901 	skmem_arena_sd_teardown(ar, FALSE);
902 
903 	/* stop here if we're in the defunct context */
904 	if (defunct) {
905 		return;
906 	}
907 	if (arn->arn_schema_cache != NULL) {
908 		skr = ar->ar_regions[SKMEM_REGION_SCHEMA];
909 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
910 		skmem_cache_destroy(arn->arn_schema_cache);
911 		arn->arn_schema_cache = NULL;
912 		skmem_region_release(skr);
913 		ar->ar_regions[SKMEM_REGION_SCHEMA] = NULL;
914 	}
915 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
916 	ASSERT(arn->arn_schema_cache == NULL);
917 
918 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_HEAD]) != NULL) {
919 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
920 		skmem_region_release(skr);
921 		ar->ar_regions[SKMEM_REGION_GUARD_HEAD] = NULL;
922 	}
923 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
924 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_TAIL]) != NULL) {
925 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
926 		skmem_region_release(skr);
927 		ar->ar_regions[SKMEM_REGION_GUARD_TAIL] = NULL;
928 	}
929 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
930 }
931 
932 /*
933  * Create an NECP arena.
934  */
935 struct skmem_arena *
skmem_arena_create_for_necp(const char * name,struct skmem_region_params * srp_ustats,struct skmem_region_params * srp_kstats,int * perr)936 skmem_arena_create_for_necp(const char *name,
937     struct skmem_region_params *srp_ustats,
938     struct skmem_region_params *srp_kstats, int *perr)
939 {
940 	struct skmem_arena_necp *__single arc;
941 	struct skmem_arena *ar;
942 	char cname[64];
943 	const char *__null_terminated cache_name = NULL;
944 
945 	*perr = 0;
946 
947 	arc = zalloc_flags(ar_necp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
948 	ar = &arc->arc_cmn;
949 	skmem_arena_init_common(ar, SKMEM_ARENA_TYPE_NECP, AR_NECP_SIZE,
950 	    "necp", name);
951 
952 	ASSERT(ar != NULL && ar->ar_zsize == AR_NECP_SIZE);
953 
954 	/*
955 	 * Must be stats region, and must be user-mappable;
956 	 * don't assert for SKMEM_REGION_CR_MONOLITHIC here
957 	 * as the client might want multi-segment mode.
958 	 */
959 	ASSERT(srp_ustats->srp_id == SKMEM_REGION_USTATS);
960 	ASSERT(srp_kstats->srp_id == SKMEM_REGION_KSTATS);
961 	ASSERT(srp_ustats->srp_cflags & SKMEM_REGION_CR_MMAPOK);
962 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_MMAPOK));
963 	ASSERT(!(srp_ustats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
964 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
965 	ASSERT(srp_ustats->srp_c_obj_size != 0);
966 	ASSERT(srp_kstats->srp_c_obj_size != 0);
967 	ASSERT(srp_ustats->srp_c_obj_cnt != 0);
968 	ASSERT(srp_kstats->srp_c_obj_cnt != 0);
969 	ASSERT(srp_ustats->srp_c_seg_size == srp_kstats->srp_c_seg_size);
970 	ASSERT(srp_ustats->srp_seg_cnt == srp_kstats->srp_seg_cnt);
971 	ASSERT(srp_ustats->srp_c_obj_size == srp_kstats->srp_c_obj_size);
972 	ASSERT(srp_ustats->srp_c_obj_cnt == srp_kstats->srp_c_obj_cnt);
973 
974 	AR_LOCK(ar);
975 
976 	if ((ar->ar_regions[SKMEM_REGION_USTATS] = skmem_region_create(name,
977 	    srp_ustats, NULL, NULL, NULL)) == NULL) {
978 		goto failed;
979 	}
980 
981 	if ((ar->ar_regions[SKMEM_REGION_KSTATS] = skmem_region_create(name,
982 	    srp_kstats, NULL, NULL, NULL)) == NULL) {
983 		goto failed;
984 	}
985 
986 	skmem_region_mirror(ar->ar_regions[SKMEM_REGION_KSTATS],
987 	    ar->ar_regions[SKMEM_REGION_USTATS]);
988 
989 	/* create skmem_cache for kernel stats (without magazines) */
990 	cache_name = tsnprintf(cname, sizeof(cname), "kstats.%s", name);
991 	if ((arc->arc_kstats_cache = skmem_cache_create(cache_name,
992 	    srp_kstats->srp_c_obj_size, 0, necp_stats_ctor, NULL, NULL,
993 	    NULL, ar->ar_regions[SKMEM_REGION_KSTATS],
994 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
995 		goto failed;
996 	}
997 
998 	if (skmem_arena_create_finalize(ar) != 0) {
999 		goto failed;
1000 	}
1001 
1002 	/*
1003 	 * These must never be configured for NECP arena.
1004 	 *
1005 	 * XXX: In theory we can add guard pages to this arena,
1006 	 * but for now leave that as an exercise for the future.
1007 	 */
1008 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1009 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1010 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1011 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1012 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1013 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1014 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1015 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1016 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1017 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1018 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1019 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1020 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1021 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1022 	}
1023 
1024 	/* these must be configured for NECP arena */
1025 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1026 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] != NULL);
1027 
1028 	++ar->ar_refcnt;        /* for caller */
1029 	AR_UNLOCK(ar);
1030 
1031 	SKMEM_ARENA_LOCK();
1032 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1033 	SKMEM_ARENA_UNLOCK();
1034 
1035 #if SK_LOG
1036 	if (__improbable(sk_verbose != 0)) {
1037 		skmem_arena_create_region_log(ar);
1038 	}
1039 #endif /* SK_LOG */
1040 
1041 	return ar;
1042 
1043 failed:
1044 	SK_ERR("\"%s\" ar %p flags 0x%x failed to create %s region",
1045 	    ar->ar_name, SK_KVA(ar), ar->ar_flags, srp_kstats->srp_name);
1046 	AR_LOCK_ASSERT_HELD(ar);
1047 	skmem_arena_destroy(ar);
1048 	*perr = ENOMEM;
1049 
1050 	return NULL;
1051 }
1052 
1053 static void
skmem_arena_necp_teardown(struct skmem_arena_necp * arc,boolean_t defunct)1054 skmem_arena_necp_teardown(struct skmem_arena_necp *arc, boolean_t defunct)
1055 {
1056 #pragma unused(defunct)
1057 	struct skmem_arena *ar = &arc->arc_cmn;
1058 	struct skmem_region *skr;
1059 
1060 	AR_LOCK_ASSERT_HELD(ar);
1061 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NECP);
1062 
1063 	/* these must never be configured for NECP arena */
1064 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1065 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1066 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1067 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1068 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1069 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1070 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1071 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1072 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1073 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1074 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1075 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1076 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1077 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1078 	}
1079 
1080 	if (arc->arc_kstats_cache != NULL) {
1081 		skr = ar->ar_regions[SKMEM_REGION_KSTATS];
1082 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1083 		skmem_cache_destroy(arc->arc_kstats_cache);
1084 		arc->arc_kstats_cache = NULL;
1085 		skmem_region_release(skr);
1086 		ar->ar_regions[SKMEM_REGION_KSTATS] = NULL;
1087 
1088 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
1089 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1090 		skmem_region_release(skr);
1091 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
1092 	}
1093 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1094 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1095 	ASSERT(arc->arc_kstats_cache == NULL);
1096 }
1097 
1098 /*
1099  * Given an arena, return its NECP variant (if applicable).
1100  */
1101 struct skmem_arena_necp *
skmem_arena_necp(struct skmem_arena * ar)1102 skmem_arena_necp(struct skmem_arena *ar)
1103 {
1104 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_NECP)) {
1105 		return NULL;
1106 	}
1107 
1108 	return (struct skmem_arena_necp *)ar;
1109 }
1110 
1111 /*
1112  * Create a System arena.
1113  */
1114 struct skmem_arena *
skmem_arena_create_for_system(const char * name,int * perr)1115 skmem_arena_create_for_system(const char *name, int *perr)
1116 {
1117 	struct skmem_region *skrsys;
1118 	struct skmem_arena_system *ars;
1119 	struct skmem_arena *ar;
1120 
1121 	*perr = 0;
1122 
1123 	ars = zalloc_flags(ar_system_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1124 	ar = &ars->ars_cmn;
1125 	skmem_arena_init_common(ar, SKMEM_ARENA_TYPE_SYSTEM, AR_SYSTEM_SIZE,
1126 	    "system", name);
1127 
1128 	ASSERT(ar != NULL && ar->ar_zsize == AR_SYSTEM_SIZE);
1129 
1130 	AR_LOCK(ar);
1131 	/* retain system-wide sysctls region */
1132 	skrsys = skmem_get_sysctls_region();
1133 	ASSERT(skrsys != NULL && skrsys->skr_id == SKMEM_REGION_SYSCTLS);
1134 	ASSERT((skrsys->skr_mode & (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES |
1135 	    SKR_MODE_KREADONLY | SKR_MODE_UREADONLY | SKR_MODE_MONOLITHIC |
1136 	    SKR_MODE_SHAREOK)) ==
1137 	    (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES | SKR_MODE_UREADONLY |
1138 	    SKR_MODE_MONOLITHIC));
1139 	ar->ar_regions[SKMEM_REGION_SYSCTLS] = skrsys;
1140 	skmem_region_retain(skrsys);
1141 
1142 	/* object is valid as long as the sysctls region is retained */
1143 	ars->ars_sysctls_obj = skmem_get_sysctls_obj(&ars->ars_sysctls_objsize);
1144 	ASSERT(ars->ars_sysctls_obj != NULL);
1145 	ASSERT(ars->ars_sysctls_objsize != 0);
1146 
1147 	if (skmem_arena_create_finalize(ar) != 0) {
1148 		SK_ERR("\"%s\" ar %p flags 0x%x failed to finalize",
1149 		    ar->ar_name, SK_KVA(ar), ar->ar_flags);
1150 		goto failed;
1151 	}
1152 
1153 	/*
1154 	 * These must never be configured for system arena.
1155 	 *
1156 	 * XXX: In theory we can add guard pages to this arena,
1157 	 * but for now leave that as an exercise for the future.
1158 	 */
1159 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1160 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1161 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1162 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1163 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1164 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1165 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1166 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1167 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1168 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1169 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1170 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1171 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1172 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1173 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1174 	}
1175 
1176 	/* these must be configured for system arena */
1177 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] != NULL);
1178 
1179 	++ar->ar_refcnt;        /* for caller */
1180 	AR_UNLOCK(ar);
1181 
1182 	SKMEM_ARENA_LOCK();
1183 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1184 	SKMEM_ARENA_UNLOCK();
1185 
1186 #if SK_LOG
1187 	if (__improbable(sk_verbose != 0)) {
1188 		skmem_arena_create_region_log(ar);
1189 	}
1190 #endif /* SK_LOG */
1191 
1192 	return ar;
1193 
1194 failed:
1195 	AR_LOCK_ASSERT_HELD(ar);
1196 	skmem_arena_destroy(ar);
1197 	*perr = ENOMEM;
1198 
1199 	return NULL;
1200 }
1201 
1202 static void
skmem_arena_system_teardown(struct skmem_arena_system * ars,boolean_t defunct)1203 skmem_arena_system_teardown(struct skmem_arena_system *ars, boolean_t defunct)
1204 {
1205 	struct skmem_arena *ar = &ars->ars_cmn;
1206 	struct skmem_region *skr;
1207 
1208 	AR_LOCK_ASSERT_HELD(ar);
1209 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1210 
1211 	/* these must never be configured for system arena */
1212 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1213 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1214 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1215 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1216 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1217 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1218 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1219 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1220 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1221 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1222 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1223 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1224 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1225 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1226 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1227 	}
1228 
1229 	/* nothing to do here for now during defunct, just return */
1230 	if (defunct) {
1231 		return;
1232 	}
1233 
1234 	if (ars->ars_sysctls_obj != NULL) {
1235 		skr = ar->ar_regions[SKMEM_REGION_SYSCTLS];
1236 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
1237 		/* we didn't allocate this, so don't free it */
1238 		ars->ars_sysctls_obj = NULL;
1239 		ars->ars_sysctls_objsize = 0;
1240 		skmem_region_release(skr);
1241 		ar->ar_regions[SKMEM_REGION_SYSCTLS] = NULL;
1242 	}
1243 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1244 	ASSERT(ars->ars_sysctls_obj == NULL);
1245 	ASSERT(ars->ars_sysctls_objsize == 0);
1246 }
1247 
1248 /*
1249  * Given an arena, return its System variant (if applicable).
1250  */
1251 struct skmem_arena_system *
skmem_arena_system(struct skmem_arena * ar)1252 skmem_arena_system(struct skmem_arena *ar)
1253 {
1254 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_SYSTEM)) {
1255 		return NULL;
1256 	}
1257 
1258 	return (struct skmem_arena_system *)ar;
1259 }
1260 
1261 void *
skmem_arena_system_sysctls_obj_addr(struct skmem_arena * ar)1262 skmem_arena_system_sysctls_obj_addr(struct skmem_arena *ar)
1263 {
1264 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1265 	return skmem_arena_system(ar)->ars_sysctls_obj;
1266 }
1267 
1268 size_t
skmem_arena_system_sysctls_obj_size(struct skmem_arena * ar)1269 skmem_arena_system_sysctls_obj_size(struct skmem_arena *ar)
1270 {
1271 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1272 	return skmem_arena_system(ar)->ars_sysctls_objsize;
1273 }
1274 
1275 /*
1276  * Destroy a region.
1277  */
1278 static void
skmem_arena_destroy(struct skmem_arena * ar)1279 skmem_arena_destroy(struct skmem_arena *ar)
1280 {
1281 	AR_LOCK_ASSERT_HELD(ar);
1282 
1283 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar %p flags 0x%x",
1284 	    ar->ar_name, SK_KVA(ar), ar->ar_flags);
1285 
1286 	ASSERT(ar->ar_refcnt == 0);
1287 	if (ar->ar_link.tqe_next != NULL || ar->ar_link.tqe_prev != NULL) {
1288 		AR_UNLOCK(ar);
1289 		SKMEM_ARENA_LOCK();
1290 		TAILQ_REMOVE(&skmem_arena_head, ar, ar_link);
1291 		SKMEM_ARENA_UNLOCK();
1292 		AR_LOCK(ar);
1293 		ASSERT(ar->ar_refcnt == 0);
1294 	}
1295 
1296 	/* teardown all remaining memory regions and associated resources */
1297 	skmem_arena_teardown(ar, FALSE);
1298 
1299 	if (ar->ar_ar != NULL) {
1300 		IOSKArenaDestroy(ar->ar_ar);
1301 		ar->ar_ar = NULL;
1302 	}
1303 
1304 	if (ar->ar_flags & ARF_ACTIVE) {
1305 		ar->ar_flags &= ~ARF_ACTIVE;
1306 	}
1307 
1308 	AR_UNLOCK(ar);
1309 
1310 	skmem_arena_free(ar);
1311 }
1312 
1313 /*
1314  * Teardown (or defunct) a region.
1315  */
1316 static void
skmem_arena_teardown(struct skmem_arena * ar,boolean_t defunct)1317 skmem_arena_teardown(struct skmem_arena *ar, boolean_t defunct)
1318 {
1319 	uint32_t i;
1320 
1321 	switch (ar->ar_type) {
1322 	case SKMEM_ARENA_TYPE_NEXUS:
1323 		skmem_arena_nexus_teardown((struct skmem_arena_nexus *)ar,
1324 		    defunct);
1325 		break;
1326 
1327 	case SKMEM_ARENA_TYPE_NECP:
1328 		skmem_arena_necp_teardown((struct skmem_arena_necp *)ar,
1329 		    defunct);
1330 		break;
1331 
1332 	case SKMEM_ARENA_TYPE_SYSTEM:
1333 		skmem_arena_system_teardown((struct skmem_arena_system *)ar,
1334 		    defunct);
1335 		break;
1336 
1337 	default:
1338 		VERIFY(0);
1339 		/* NOTREACHED */
1340 		__builtin_unreachable();
1341 	}
1342 
1343 	/* stop here if we're in the defunct context */
1344 	if (defunct) {
1345 		return;
1346 	}
1347 
1348 	/* take care of any remaining ones */
1349 	for (i = 0; i < SKMEM_REGIONS; i++) {
1350 		if (ar->ar_regions[i] == NULL) {
1351 			continue;
1352 		}
1353 
1354 		skmem_region_release(ar->ar_regions[i]);
1355 		ar->ar_regions[i] = NULL;
1356 	}
1357 }
1358 
1359 static int
skmem_arena_create_finalize(struct skmem_arena * ar)1360 skmem_arena_create_finalize(struct skmem_arena *ar)
1361 {
1362 	IOSKRegionRef reg[SKMEM_REGIONS];
1363 	uint32_t i, regcnt = 0;
1364 	int err = 0;
1365 
1366 	AR_LOCK_ASSERT_HELD(ar);
1367 
1368 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1369 
1370 	/*
1371 	 * Prepare an array of regions that can be mapped to user task;
1372 	 * exclude regions that aren't eligible for user task mapping.
1373 	 */
1374 	bzero(&reg, sizeof(reg));
1375 	for (i = 0; i < SKMEM_REGIONS; i++) {
1376 		struct skmem_region *skr = ar->ar_regions[i];
1377 		if (skr == NULL || !(skr->skr_mode & SKR_MODE_MMAPOK)) {
1378 			continue;
1379 		}
1380 
1381 		ASSERT(skr->skr_reg != NULL);
1382 		reg[regcnt++] = skr->skr_reg;
1383 	}
1384 	ASSERT(regcnt != 0);
1385 
1386 	/*
1387 	 * Create backing IOSKArena handle.
1388 	 */
1389 	ar->ar_ar = IOSKArenaCreate(reg, (IOSKCount)regcnt);
1390 	if (ar->ar_ar == NULL) {
1391 		SK_ERR("\"%s\" ar %p flags 0x%x failed to create IOSKArena of"
1392 		    "%u regions", ar->ar_name, SK_KVA(ar), ar->ar_flags, regcnt);
1393 		err = ENOMEM;
1394 		goto failed;
1395 	}
1396 
1397 	ar->ar_flags |= ARF_ACTIVE;
1398 
1399 failed:
1400 	return err;
1401 }
1402 
1403 static void
skmem_arena_free(struct skmem_arena * ar)1404 skmem_arena_free(struct skmem_arena *ar)
1405 {
1406 #if DEBUG || DEVELOPMENT
1407 	ASSERT(ar->ar_refcnt == 0);
1408 	ASSERT(!(ar->ar_flags & ARF_ACTIVE));
1409 	ASSERT(ar->ar_ar == NULL);
1410 	ASSERT(ar->ar_mapcnt == 0);
1411 	ASSERT(SLIST_EMPTY(&ar->ar_map_head));
1412 	for (uint32_t i = 0; i < SKMEM_REGIONS; i++) {
1413 		ASSERT(ar->ar_regions[i] == NULL);
1414 	}
1415 #endif /* DEBUG || DEVELOPMENT */
1416 
1417 	lck_mtx_destroy(&ar->ar_lock, &skmem_arena_lock_grp);
1418 	switch (ar->ar_type) {
1419 	case SKMEM_ARENA_TYPE_NEXUS:
1420 		zfree(ar_nexus_zone, ar);
1421 		break;
1422 
1423 	case SKMEM_ARENA_TYPE_NECP:
1424 		zfree(ar_necp_zone, ar);
1425 		break;
1426 
1427 	case SKMEM_ARENA_TYPE_SYSTEM:
1428 		zfree(ar_system_zone, ar);
1429 		break;
1430 
1431 	default:
1432 		VERIFY(0);
1433 		/* NOTREACHED */
1434 		__builtin_unreachable();
1435 	}
1436 }
1437 
1438 /*
1439  * Retain an arena.
1440  */
1441 __attribute__((always_inline))
1442 static inline void
skmem_arena_retain_locked(struct skmem_arena * ar)1443 skmem_arena_retain_locked(struct skmem_arena *ar)
1444 {
1445 	AR_LOCK_ASSERT_HELD(ar);
1446 	ar->ar_refcnt++;
1447 	ASSERT(ar->ar_refcnt != 0);
1448 }
1449 
1450 void
skmem_arena_retain(struct skmem_arena * ar)1451 skmem_arena_retain(struct skmem_arena *ar)
1452 {
1453 	AR_LOCK(ar);
1454 	skmem_arena_retain_locked(ar);
1455 	AR_UNLOCK(ar);
1456 }
1457 
1458 /*
1459  * Release (and potentially destroy) an arena.
1460  */
1461 __attribute__((always_inline))
1462 static inline boolean_t
skmem_arena_release_locked(struct skmem_arena * ar)1463 skmem_arena_release_locked(struct skmem_arena *ar)
1464 {
1465 	boolean_t lastref = FALSE;
1466 
1467 	AR_LOCK_ASSERT_HELD(ar);
1468 	ASSERT(ar->ar_refcnt != 0);
1469 	if (--ar->ar_refcnt == 0) {
1470 		skmem_arena_destroy(ar);
1471 		lastref = TRUE;
1472 	} else {
1473 		lastref = FALSE;
1474 	}
1475 
1476 	return lastref;
1477 }
1478 
1479 boolean_t
skmem_arena_release(struct skmem_arena * ar)1480 skmem_arena_release(struct skmem_arena *ar)
1481 {
1482 	boolean_t lastref;
1483 
1484 	AR_LOCK(ar);
1485 	/* unlock only if this isn't the last reference */
1486 	if (!(lastref = skmem_arena_release_locked(ar))) {
1487 		AR_UNLOCK(ar);
1488 	}
1489 
1490 	return lastref;
1491 }
1492 
1493 /*
1494  * Map an arena to the task's address space.
1495  */
1496 int
skmem_arena_mmap(struct skmem_arena * ar,struct proc * p,struct skmem_arena_mmap_info * ami)1497 skmem_arena_mmap(struct skmem_arena *ar, struct proc *p,
1498     struct skmem_arena_mmap_info *ami)
1499 {
1500 	struct task *__single task = proc_task(p);
1501 	IOReturn ioerr;
1502 	int err = 0;
1503 
1504 	ASSERT(task != kernel_task && task != TASK_NULL);
1505 	ASSERT(ami->ami_arena == NULL);
1506 	ASSERT(ami->ami_mapref == NULL);
1507 	ASSERT(ami->ami_maptask == TASK_NULL);
1508 	ASSERT(!ami->ami_redirect);
1509 
1510 	AR_LOCK(ar);
1511 	if ((ar->ar_flags & (ARF_ACTIVE | ARF_DEFUNCT)) != ARF_ACTIVE) {
1512 		err = ENODEV;
1513 		goto failed;
1514 	}
1515 
1516 	ASSERT(ar->ar_ar != NULL);
1517 	if ((ami->ami_mapref = IOSKMapperCreate(ar->ar_ar, task)) == NULL) {
1518 		err = ENOMEM;
1519 		goto failed;
1520 	}
1521 
1522 	ioerr = IOSKMapperGetAddress(ami->ami_mapref, &ami->ami_mapaddr,
1523 	    &ami->ami_mapsize);
1524 	VERIFY(ioerr == kIOReturnSuccess);
1525 
1526 	ami->ami_arena = ar;
1527 	skmem_arena_retain_locked(ar);
1528 	SLIST_INSERT_HEAD(&ar->ar_map_head, ami, ami_link);
1529 
1530 	ami->ami_maptask = task;
1531 	ar->ar_mapcnt++;
1532 	if (ar->ar_mapcnt == 1) {
1533 		ar->ar_mapsize = ami->ami_mapsize;
1534 	}
1535 
1536 	ASSERT(ami->ami_mapref != NULL);
1537 	ASSERT(ami->ami_arena == ar);
1538 	AR_UNLOCK(ar);
1539 
1540 	return 0;
1541 
1542 failed:
1543 	AR_UNLOCK(ar);
1544 	skmem_arena_munmap(ar, ami);
1545 	VERIFY(err != 0);
1546 
1547 	return err;
1548 }
1549 
1550 /*
1551  * Remove arena's memory mapping from task's address space (common code).
1552  * Returns true if caller needs to perform a deferred defunct.
1553  */
1554 static boolean_t
skmem_arena_munmap_common(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1555 skmem_arena_munmap_common(struct skmem_arena *ar,
1556     struct skmem_arena_mmap_info *ami)
1557 {
1558 	boolean_t need_defunct = FALSE;
1559 
1560 	AR_LOCK(ar);
1561 	if (ami->ami_mapref != NULL) {
1562 		IOSKMapperDestroy(ami->ami_mapref);
1563 		ami->ami_mapref = NULL;
1564 
1565 		VERIFY(ar->ar_mapcnt != 0);
1566 		ar->ar_mapcnt--;
1567 		if (ar->ar_mapcnt == 0) {
1568 			ar->ar_mapsize = 0;
1569 		}
1570 
1571 		VERIFY(ami->ami_arena == ar);
1572 		SLIST_REMOVE(&ar->ar_map_head, ami, skmem_arena_mmap_info,
1573 		    ami_link);
1574 
1575 		/*
1576 		 * We expect that the caller ensures an extra reference
1577 		 * held on the arena, in addition to the one in mmap_info.
1578 		 */
1579 		VERIFY(ar->ar_refcnt > 1);
1580 		(void) skmem_arena_release_locked(ar);
1581 		ami->ami_arena = NULL;
1582 
1583 		if (ami->ami_redirect) {
1584 			/*
1585 			 * This mapper has been redirected; decrement
1586 			 * the redirect count associated with it.
1587 			 */
1588 			VERIFY(ar->ar_maprdrcnt != 0);
1589 			ar->ar_maprdrcnt--;
1590 		} else if (ar->ar_maprdrcnt != 0 &&
1591 		    ar->ar_maprdrcnt == ar->ar_mapcnt) {
1592 			/*
1593 			 * The are other mappers for this arena that have
1594 			 * all been redirected, but the arena wasn't marked
1595 			 * inactive by skmem_arena_redirect() last time since
1596 			 * this particular mapper that we just destroyed
1597 			 * was using it.  Now that it's gone, finish the
1598 			 * postponed work below once we return to caller.
1599 			 */
1600 			ASSERT(ar->ar_flags & ARF_ACTIVE);
1601 			ar->ar_flags &= ~ARF_ACTIVE;
1602 			need_defunct = TRUE;
1603 		}
1604 	}
1605 	ASSERT(ami->ami_mapref == NULL);
1606 	ASSERT(ami->ami_arena == NULL);
1607 
1608 	ami->ami_maptask = TASK_NULL;
1609 	ami->ami_mapaddr = 0;
1610 	ami->ami_mapsize = 0;
1611 	ami->ami_redirect = FALSE;
1612 
1613 	AR_UNLOCK(ar);
1614 
1615 	return need_defunct;
1616 }
1617 
1618 /*
1619  * Remove arena's memory mapping from task's address space (channel version).
1620  * Will perform a deferred defunct if needed.
1621  */
1622 void
skmem_arena_munmap_channel(struct skmem_arena * ar,struct kern_channel * ch)1623 skmem_arena_munmap_channel(struct skmem_arena *ar, struct kern_channel *ch)
1624 {
1625 	SK_LOCK_ASSERT_HELD();
1626 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1627 
1628 	/*
1629 	 * If this is this is on a channel that was holding the last
1630 	 * active reference count on the arena, and that there are
1631 	 * other defunct channels pointing to that arena, perform the
1632 	 * actual arena defunct now.
1633 	 */
1634 	if (skmem_arena_munmap_common(ar, &ch->ch_mmap)) {
1635 		struct kern_nexus *nx = ch->ch_nexus;
1636 		struct kern_nexus_domain_provider *nxdom_prov = NX_DOM_PROV(nx);
1637 
1638 		/*
1639 		 * Similar to kern_channel_defunct(), where we let the
1640 		 * domain provider complete the defunct.  At this point
1641 		 * both sk_lock and the channel locks are held, and so
1642 		 * we indicate that to the callee.
1643 		 */
1644 		nxdom_prov->nxdom_prov_dom->nxdom_defunct_finalize(nxdom_prov,
1645 		    nx, ch, TRUE);
1646 	}
1647 }
1648 
1649 /*
1650  * Remove arena's memory mapping from task's address space (generic).
1651  * This routine should only be called on non-channel related arenas.
1652  */
1653 void
skmem_arena_munmap(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1654 skmem_arena_munmap(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami)
1655 {
1656 	(void) skmem_arena_munmap_common(ar, ami);
1657 }
1658 
1659 /*
1660  * Redirect eligible memory regions in the task's memory map so that
1661  * they get overwritten and backed with anonymous (zero-filled) pages.
1662  */
1663 int
skmem_arena_mredirect(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami,struct proc * p,boolean_t * need_defunct)1664 skmem_arena_mredirect(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami,
1665     struct proc *p, boolean_t *need_defunct)
1666 {
1667 #pragma unused(p)
1668 	int err = 0;
1669 
1670 	*need_defunct = FALSE;
1671 
1672 	AR_LOCK(ar);
1673 	ASSERT(ar->ar_ar != NULL);
1674 	if (ami->ami_redirect) {
1675 		err = EALREADY;
1676 	} else if (ami->ami_mapref == NULL) {
1677 		err = ENXIO;
1678 	} else {
1679 		VERIFY(ar->ar_mapcnt != 0);
1680 		ASSERT(ar->ar_flags & ARF_ACTIVE);
1681 		VERIFY(ami->ami_arena == ar);
1682 		/*
1683 		 * This effectively overwrites the mappings for all
1684 		 * redirectable memory regions (i.e. those without the
1685 		 * SKMEM_REGION_CR_NOREDIRECT flag) while preserving their
1686 		 * protection flags.  Accesses to these regions will be
1687 		 * redirected to anonymous, zero-filled pages.
1688 		 */
1689 		IOSKMapperRedirect(ami->ami_mapref);
1690 		ami->ami_redirect = TRUE;
1691 
1692 		/*
1693 		 * Mark the arena as inactive if all mapper instances are
1694 		 * redirected; otherwise, we do this later during unmap.
1695 		 * Once inactive, the arena will not allow further mmap,
1696 		 * and it is ready to be defunct later.
1697 		 */
1698 		if (++ar->ar_maprdrcnt == ar->ar_mapcnt) {
1699 			ar->ar_flags &= ~ARF_ACTIVE;
1700 			*need_defunct = TRUE;
1701 		}
1702 	}
1703 	AR_UNLOCK(ar);
1704 
1705 	SK_DF(((err != 0) ? SK_VERB_ERROR : SK_VERB_DEFAULT),
1706 	    "%s(%d) \"%s\" ar %p flags 0x%x inactive %u need_defunct %u "
1707 	    "err %d", sk_proc_name(p), sk_proc_pid(p), ar->ar_name,
1708 	    SK_KVA(ar), ar->ar_flags, !(ar->ar_flags & ARF_ACTIVE),
1709 	    *need_defunct, err);
1710 
1711 	return err;
1712 }
1713 
1714 /*
1715  * Defunct a region.
1716  */
1717 int
skmem_arena_defunct(struct skmem_arena * ar)1718 skmem_arena_defunct(struct skmem_arena *ar)
1719 {
1720 	AR_LOCK(ar);
1721 
1722 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar %p flags 0x%x", ar->ar_name,
1723 	    SK_KVA(ar), ar->ar_flags);
1724 
1725 	if (ar->ar_flags & ARF_DEFUNCT) {
1726 		AR_UNLOCK(ar);
1727 		return EALREADY;
1728 	} else if (ar->ar_flags & ARF_ACTIVE) {
1729 		AR_UNLOCK(ar);
1730 		return EBUSY;
1731 	}
1732 
1733 	/* purge the caches now */
1734 	skmem_arena_reap_locked(ar, TRUE);
1735 
1736 	/* teardown eligible memory regions and associated resources */
1737 	skmem_arena_teardown(ar, TRUE);
1738 
1739 	ar->ar_flags |= ARF_DEFUNCT;
1740 
1741 	AR_UNLOCK(ar);
1742 
1743 	return 0;
1744 }
1745 
1746 /*
1747  * Retrieve total and in-use memory statistics of regions in the arena.
1748  */
1749 void
skmem_arena_get_stats(struct skmem_arena * ar,uint64_t * mem_total,uint64_t * mem_inuse)1750 skmem_arena_get_stats(struct skmem_arena *ar, uint64_t *mem_total,
1751     uint64_t *mem_inuse)
1752 {
1753 	uint32_t i;
1754 
1755 	if (mem_total != NULL) {
1756 		*mem_total = 0;
1757 	}
1758 	if (mem_inuse != NULL) {
1759 		*mem_inuse = 0;
1760 	}
1761 
1762 	AR_LOCK(ar);
1763 	for (i = 0; i < SKMEM_REGIONS; i++) {
1764 		if (ar->ar_regions[i] == NULL) {
1765 			continue;
1766 		}
1767 
1768 		if (mem_total != NULL) {
1769 			*mem_total += AR_MEM_TOTAL(ar, i);
1770 		}
1771 		if (mem_inuse != NULL) {
1772 			*mem_inuse += AR_MEM_INUSE(ar, i);
1773 		}
1774 	}
1775 	AR_UNLOCK(ar);
1776 }
1777 
1778 /*
1779  * Retrieve the offset of a particular region (identified by its ID)
1780  * from the base of the arena.
1781  */
1782 mach_vm_offset_t
skmem_arena_get_region_offset(struct skmem_arena * ar,skmem_region_id_t id)1783 skmem_arena_get_region_offset(struct skmem_arena *ar, skmem_region_id_t id)
1784 {
1785 	mach_vm_offset_t offset = 0;
1786 	uint32_t i;
1787 
1788 	ASSERT(id < SKMEM_REGIONS);
1789 
1790 	AR_LOCK(ar);
1791 	for (i = 0; i < id; i++) {
1792 		if (ar->ar_regions[i] == NULL) {
1793 			continue;
1794 		}
1795 
1796 		offset += ar->ar_regions[i]->skr_size;
1797 	}
1798 	AR_UNLOCK(ar);
1799 
1800 	return offset;
1801 }
1802 
1803 static void
skmem_reap_pbufpool_caches(struct kern_pbufpool * pp,boolean_t purge)1804 skmem_reap_pbufpool_caches(struct kern_pbufpool *pp, boolean_t purge)
1805 {
1806 	if (pp->pp_kmd_cache != NULL) {
1807 		skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1808 	}
1809 	if (PP_BUF_CACHE_DEF(pp) != NULL) {
1810 		skmem_cache_reap_now(PP_BUF_CACHE_DEF(pp), purge);
1811 	}
1812 	if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1813 		skmem_cache_reap_now(PP_BUF_CACHE_LARGE(pp), purge);
1814 	}
1815 	if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1816 		skmem_cache_reap_now(PP_KBFT_CACHE_DEF(pp), purge);
1817 	}
1818 	if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1819 		skmem_cache_reap_now(PP_KBFT_CACHE_LARGE(pp), purge);
1820 	}
1821 }
1822 
1823 /*
1824  * Reap all of configured caches in the arena, so that any excess amount
1825  * outside of their working sets gets released to their respective backing
1826  * regions.  If purging is specified, we empty the caches' working sets,
1827  * including everything that's cached at the CPU layer.
1828  */
1829 static void
skmem_arena_reap_locked(struct skmem_arena * ar,boolean_t purge)1830 skmem_arena_reap_locked(struct skmem_arena *ar, boolean_t purge)
1831 {
1832 	struct skmem_arena_nexus *arn;
1833 	struct skmem_arena_necp *arc;
1834 	struct kern_pbufpool *pp;
1835 
1836 	AR_LOCK_ASSERT_HELD(ar);
1837 
1838 	switch (ar->ar_type) {
1839 	case SKMEM_ARENA_TYPE_NEXUS:
1840 		arn = (struct skmem_arena_nexus *)ar;
1841 		if (arn->arn_schema_cache != NULL) {
1842 			skmem_cache_reap_now(arn->arn_schema_cache, purge);
1843 		}
1844 		if (arn->arn_ring_cache != NULL) {
1845 			skmem_cache_reap_now(arn->arn_ring_cache, purge);
1846 		}
1847 		if ((pp = arn->arn_rx_pp) != NULL) {
1848 			skmem_reap_pbufpool_caches(pp, purge);
1849 		}
1850 		if ((pp = arn->arn_tx_pp) != NULL && pp != arn->arn_rx_pp) {
1851 			skmem_reap_pbufpool_caches(pp, purge);
1852 		}
1853 		break;
1854 
1855 	case SKMEM_ARENA_TYPE_NECP:
1856 		arc = (struct skmem_arena_necp *)ar;
1857 		if (arc->arc_kstats_cache != NULL) {
1858 			skmem_cache_reap_now(arc->arc_kstats_cache, purge);
1859 		}
1860 		break;
1861 
1862 	case SKMEM_ARENA_TYPE_SYSTEM:
1863 		break;
1864 	}
1865 }
1866 
1867 void
skmem_arena_reap(struct skmem_arena * ar,boolean_t purge)1868 skmem_arena_reap(struct skmem_arena *ar, boolean_t purge)
1869 {
1870 	AR_LOCK(ar);
1871 	skmem_arena_reap_locked(ar, purge);
1872 	AR_UNLOCK(ar);
1873 }
1874 
1875 #if SK_LOG
1876 SK_LOG_ATTRIBUTE
1877 static void
skmem_arena_create_region_log(struct skmem_arena * ar)1878 skmem_arena_create_region_log(struct skmem_arena *ar)
1879 {
1880 	char label[32];
1881 	int i;
1882 
1883 	switch (ar->ar_type) {
1884 	case SKMEM_ARENA_TYPE_NEXUS:
1885 		SK_D("\"%s\" ar %p flags 0x%x rx_pp %p tx_pp %p",
1886 		    ar->ar_name, SK_KVA(ar), ar->ar_flags,
1887 		    SK_KVA(skmem_arena_nexus(ar)->arn_rx_pp),
1888 		    SK_KVA(skmem_arena_nexus(ar)->arn_tx_pp));
1889 		break;
1890 
1891 	case SKMEM_ARENA_TYPE_NECP:
1892 	case SKMEM_ARENA_TYPE_SYSTEM:
1893 		SK_D("\"%s\" ar %p flags 0x%x", ar->ar_name, SK_KVA(ar),
1894 		    ar->ar_flags);
1895 		break;
1896 	}
1897 
1898 	for (i = 0; i < SKMEM_REGIONS; i++) {
1899 		if (ar->ar_regions[i] == NULL) {
1900 			continue;
1901 		}
1902 
1903 		(void) snprintf(label, sizeof(label), "REGION_%s:",
1904 		    skmem_region_id2name(i));
1905 		SK_D("  %-16s %6u KB s:[%2u x %6u KB] "
1906 		    "o:[%4u x %6u -> %4u x %6u]", label,
1907 		    (uint32_t)AR_MEM_TOTAL(ar, i) >> 10,
1908 		    (uint32_t)AR_MEM_SEGCNT(ar, i),
1909 		    (uint32_t)AR_MEM_SEGSIZE(ar, i) >> 10,
1910 		    (uint32_t)AR_MEM_OBJCNT_R(ar, i),
1911 		    (uint32_t)AR_MEM_OBJSIZE_R(ar, i),
1912 		    (uint32_t)AR_MEM_OBJCNT_C(ar, i),
1913 		    (uint32_t)AR_MEM_OBJSIZE_C(ar, i));
1914 	}
1915 }
1916 #endif /* SK_LOG */
1917 
1918 static size_t
skmem_arena_mib_get_stats(struct skmem_arena * ar,void * __sized_by (len)out,size_t len)1919 skmem_arena_mib_get_stats(struct skmem_arena *ar, void *__sized_by(len) out,
1920     size_t len)
1921 {
1922 	size_t actual_space = sizeof(struct sk_stats_arena);
1923 	struct sk_stats_arena *__single sar;
1924 	struct skmem_arena_mmap_info *ami = NULL;
1925 	pid_t proc_pid;
1926 	int i;
1927 
1928 	if (out == NULL || len < actual_space) {
1929 		goto done;
1930 	}
1931 	sar = out;
1932 
1933 	AR_LOCK(ar);
1934 	(void) snprintf(sar->sar_name, sizeof(sar->sar_name),
1935 	    "%s", ar->ar_name);
1936 	sar->sar_type = (sk_stats_arena_type_t)ar->ar_type;
1937 	sar->sar_mapsize = (uint64_t)ar->ar_mapsize;
1938 	i = 0;
1939 	SLIST_FOREACH(ami, &ar->ar_map_head, ami_link) {
1940 		if (ami->ami_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS) {
1941 			struct kern_channel *__single ch;
1942 			ch = __unsafe_forge_single(struct kern_channel *,
1943 			    container_of(ami, struct kern_channel, ch_mmap));
1944 			proc_pid = ch->ch_pid;
1945 		} else {
1946 			ASSERT((ami->ami_arena->ar_type ==
1947 			    SKMEM_ARENA_TYPE_NECP) ||
1948 			    (ami->ami_arena->ar_type ==
1949 			    SKMEM_ARENA_TYPE_SYSTEM));
1950 			proc_pid =
1951 			    necp_client_get_proc_pid_from_arena_info(ami);
1952 		}
1953 		sar->sar_mapped_pids[i++] = proc_pid;
1954 		if (i >= SK_STATS_ARENA_MAPPED_PID_MAX) {
1955 			break;
1956 		}
1957 	}
1958 
1959 	for (i = 0; i < SKMEM_REGIONS; i++) {
1960 		struct skmem_region *skr = ar->ar_regions[i];
1961 		uuid_t *sreg_uuid = &sar->sar_regions_uuid[i];
1962 
1963 		if (skr == NULL) {
1964 			uuid_clear(*sreg_uuid);
1965 			continue;
1966 		}
1967 
1968 		uuid_copy(*sreg_uuid, skr->skr_uuid);
1969 	}
1970 	AR_UNLOCK(ar);
1971 
1972 done:
1973 	return actual_space;
1974 }
1975 
1976 static int
1977 skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS
1978 {
1979 #pragma unused(arg1, arg2, oidp)
1980 	struct skmem_arena *ar;
1981 	size_t actual_space;
1982 	size_t buffer_space;
1983 	size_t allocated_space = 0;
1984 	caddr_t __sized_by(allocated_space) buffer = NULL;
1985 	caddr_t scan;
1986 	int error = 0;
1987 
1988 	if (!kauth_cred_issuser(kauth_cred_get())) {
1989 		return EPERM;
1990 	}
1991 
1992 	net_update_uptime();
1993 	buffer_space = req->oldlen;
1994 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
1995 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
1996 			buffer_space = SK_SYSCTL_ALLOC_MAX;
1997 		}
1998 		caddr_t temp;
1999 		temp = sk_alloc_data(buffer_space, Z_WAITOK, skmem_tag_arena_mib);
2000 		if (__improbable(temp == NULL)) {
2001 			return ENOBUFS;
2002 		}
2003 		buffer = temp;
2004 		allocated_space = buffer_space;
2005 	} else if (req->oldptr == USER_ADDR_NULL) {
2006 		buffer_space = 0;
2007 	}
2008 	actual_space = 0;
2009 	scan = buffer;
2010 
2011 	SKMEM_ARENA_LOCK();
2012 	TAILQ_FOREACH(ar, &skmem_arena_head, ar_link) {
2013 		size_t size = skmem_arena_mib_get_stats(ar, scan, buffer_space);
2014 		if (scan != NULL) {
2015 			if (buffer_space < size) {
2016 				/* supplied buffer too small, stop copying */
2017 				error = ENOMEM;
2018 				break;
2019 			}
2020 			scan += size;
2021 			buffer_space -= size;
2022 		}
2023 		actual_space += size;
2024 	}
2025 	SKMEM_ARENA_UNLOCK();
2026 
2027 	if (actual_space != 0) {
2028 		int out_error = SYSCTL_OUT(req, buffer, actual_space);
2029 		if (out_error != 0) {
2030 			error = out_error;
2031 		}
2032 	}
2033 	if (buffer != NULL) {
2034 		sk_free_data_sized_by(buffer, allocated_space);
2035 	}
2036 
2037 	return error;
2038 }
2039 
2040 #if SK_LOG
2041 SK_NO_INLINE_ATTRIBUTE
2042 char *
ar2str(const struct skmem_arena * ar,char * __counted_by (dsz)dst,size_t dsz)2043 ar2str(const struct skmem_arena *ar, char *__counted_by(dsz)dst,
2044     size_t dsz)
2045 {
2046 	(void) sk_snprintf(dst, dsz, "%p %s flags 0x%b",
2047 	    SK_KVA(ar), ar->ar_name, ar->ar_flags, ARF_BITS);
2048 
2049 	return dst;
2050 }
2051 #endif
2052