xref: /xnu-8020.101.4/bsd/skywalk/mem/skmem_arena.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* BEGIN CSTYLED */
30 /*
31  * SKMEM_ARENA_TYPE_NEXUS:
32  *
33  *   This arena represents the memory subsystem of a nexus adapter.  It consist
34  *   of a collection of memory regions that are usable by the nexus, as well
35  *   as the various caches for objects in those regions.
36  *
37  *       (1 per nexus adapter)
38  *     +=======================+
39  *     |      skmem_arena      |
40  *     +-----------------------+              (backing regions)
41  *     |     ar_regions[0]     |           +=======================+
42  *     :          ...          : ------->> |     skmem_region      |===+
43  *     |     ar_regions[n]     |           +=======================+   |===+
44  *     +=======================+               +=======================+   |
45  *     |     arn_{caches,pp}   | ---+              +=======================+
46  *     +-----------------------+    |
47  *     |     arn_stats_obj     |    |
48  *     |     arn_flowadv_obj   |    |         (cache frontends)
49  *     |     arn_nexusadv_obj  |    |      +=======================+
50  *     +-----------------------+    +--->> |     skmem_cache       |===+
51  *                                         +=======================+   |===+
52  *                                             +=======================+   |
53  *                                                 +=======================+
54  *
55  *   Three regions {umd,kmd,buf} are used for the packet buffer pool, which
56  *   may be external to the nexus adapter, e.g. created by the driver or an
57  *   external entity.  If not supplied, we create these regions along with
58  *   the packet buffer pool ourselves.  The rest of the regions (unrelated
59  *   to the packet buffer pool) are unique to the arena and are allocated at
60  *   arena creation time.
61  *
62  *   An arena may be mapped to a user task/process for as many times as needed.
63  *   The result of each mapping is a contiguous range within the address space
64  *   of that task, indicated by [ami_mapaddr, ami_mapaddr + ami_mapsize) span.
65  *   This is achieved by leveraging the mapper memory object ar_mapper that
66  *   "stitches" the disjoint segments together.  Only user-mappable regions,
67  *   i.e. those marked with SKR_MODE_MMAPOK, will be included in this span.
68  *
69  *   Nexus adapters that are eligible for defunct will trigger the arena to
70  *   undergo memory redirection for all regions except those that are marked
71  *   with SKR_MODE_NOREDIRECT.  This happens when all of the channels opened
72  *   to the adapter are defunct.  Upon completion, those redirected regions
73  *   will be torn down in order to reduce their memory footprints.  When this
74  *   happens the adapter and its arena are no longer active or in service.
75  *
76  *   The arena exposes caches for allocating and freeing most region objects.
77  *   These slab-allocator based caches act as front-ends to the regions; only
78  *   the metadata cache (for kern_packet_t) utilizes the magazines layer.  All
79  *   other ones simply utilize skmem_cache for slab-based allocations.
80  *
81  *   Certain regions contain singleton objects that are simple enough to not
82  *   require the slab allocator, such as the ones used for statistics and flow
83  *   advisories.  Because of this, we directly allocate from those regions
84  *   and store the objects in the arena.
85  *
86  * SKMEM_ARENA_TYPE_NECP:
87  *
88  *   This arena represents the memory subsystem of an NECP file descriptor
89  *   object.  It consists of a memory region for per-flow statistics, as well
90  *   as a cache front-end for that region.
91  *
92  * SKMEM_ARENA_SYSTEM:
93  *
94  *   This arena represents general, system-wide objects.  It currently
95  *   consists of the sysctls region that's created once at init time.
96  */
97 /* END CSTYLED */
98 
99 #include <skywalk/os_skywalk_private.h>
100 #include <net/necp.h>
101 
102 static void skmem_arena_destroy(struct skmem_arena *);
103 static void skmem_arena_teardown(struct skmem_arena *, boolean_t);
104 static int skmem_arena_create_finalize(struct skmem_arena *);
105 static void skmem_arena_nexus_teardown(struct skmem_arena_nexus *, boolean_t);
106 static void skmem_arena_necp_teardown(struct skmem_arena_necp *, boolean_t);
107 static void skmem_arena_system_teardown(struct skmem_arena_system *, boolean_t);
108 static struct skmem_arena *skmem_arena_alloc(skmem_arena_type_t,
109     const char *);
110 static void skmem_arena_free(struct skmem_arena *);
111 static void skmem_arena_retain_locked(struct skmem_arena *);
112 static void skmem_arena_reap_locked(struct skmem_arena *, boolean_t);
113 static boolean_t skmem_arena_munmap_common(struct skmem_arena *,
114     struct skmem_arena_mmap_info *);
115 #if SK_LOG
116 static void skmem_arena_create_region_log(struct skmem_arena *);
117 #endif /* SK_LOG */
118 static int skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS;
119 
120 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, arena,
121     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
122     0, 0, skmem_arena_mib_get_sysctl, "S,sk_stats_arena",
123     "Skywalk arena statistics");
124 
125 static LCK_GRP_DECLARE(skmem_arena_lock_grp, "skmem_arena");
126 static LCK_MTX_DECLARE(skmem_arena_lock, &skmem_arena_lock_grp);
127 
128 static TAILQ_HEAD(, skmem_arena) skmem_arena_head;
129 
130 #define SKMEM_ARENA_LOCK()                      \
131 	lck_mtx_lock(&skmem_arena_lock)
132 #define SKMEM_ARENA_LOCK_ASSERT_HELD()          \
133 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_OWNED)
134 #define SKMEM_ARENA_LOCK_ASSERT_NOTHELD()       \
135 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_NOTOWNED)
136 #define SKMEM_ARENA_UNLOCK()                    \
137 	lck_mtx_unlock(&skmem_arena_lock)
138 
139 #define AR_NEXUS_SIZE           sizeof(struct skmem_arena_nexus)
140 static ZONE_DEFINE(ar_nexus_zone, SKMEM_ZONE_PREFIX ".mem.arena.nexus",
141     AR_NEXUS_SIZE, ZC_ZFREE_CLEARMEM);
142 
143 #define AR_NECP_SIZE            sizeof(struct skmem_arena_necp)
144 static ZONE_DEFINE(ar_necp_zone, SKMEM_ZONE_PREFIX ".mem.arena.necp",
145     AR_NECP_SIZE, ZC_ZFREE_CLEARMEM);
146 
147 #define AR_SYSTEM_SIZE          sizeof(struct skmem_arena_system)
148 static ZONE_DEFINE(ar_system_zone, SKMEM_ZONE_PREFIX ".mem.arena.system",
149     AR_SYSTEM_SIZE, ZC_ZFREE_CLEARMEM);
150 
151 #define SKMEM_TAG_ARENA_MIB     "com.apple.skywalk.arena.mib"
152 static kern_allocation_name_t skmem_tag_arena_mib;
153 
154 void
skmem_arena_init(void)155 skmem_arena_init(void)
156 {
157 	_CASSERT(SKMEM_ARENA_TYPE_NEXUS == SAR_TYPE_NEXUS);
158 	_CASSERT(SKMEM_ARENA_TYPE_NECP == SAR_TYPE_NECP);
159 	_CASSERT(SKMEM_ARENA_TYPE_SYSTEM == SAR_TYPE_SYSTEM);
160 
161 	TAILQ_INIT(&skmem_arena_head);
162 
163 	ASSERT(skmem_tag_arena_mib == NULL);
164 	skmem_tag_arena_mib =
165 	    kern_allocation_name_allocate(SKMEM_TAG_ARENA_MIB, 0);
166 	ASSERT(skmem_tag_arena_mib != NULL);
167 }
168 
169 void
skmem_arena_fini(void)170 skmem_arena_fini(void)
171 {
172 	if (skmem_tag_arena_mib != NULL) {
173 		kern_allocation_name_release(skmem_tag_arena_mib);
174 		skmem_tag_arena_mib = NULL;
175 	}
176 }
177 
178 SK_NO_INLINE_ATTRIBUTE
179 static int
skmem_arena_sd_setup(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct skmem_arena * ar,boolean_t kernel_only,boolean_t tx)180 skmem_arena_sd_setup(const struct nexus_adapter *na,
181     struct skmem_region_params srp[SKMEM_REGIONS], struct skmem_arena *ar,
182     boolean_t kernel_only, boolean_t tx)
183 {
184 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
185 	struct skmem_cache **cachep;
186 	struct skmem_region *ksd_skr = NULL, *usd_skr = NULL;
187 	const char *name = na->na_name;
188 	char cname[64];
189 	skmem_region_id_t usd_type, ksd_type;
190 	int err = 0;
191 
192 	usd_type = tx ? SKMEM_REGION_TXAUSD : SKMEM_REGION_RXFUSD;
193 	ksd_type = tx ? SKMEM_REGION_TXAKSD : SKMEM_REGION_RXFKSD;
194 	if (tx) {
195 		usd_type = SKMEM_REGION_TXAUSD;
196 		ksd_type = SKMEM_REGION_TXAKSD;
197 		cachep = &arn->arn_txaksd_cache;
198 	} else {
199 		usd_type = SKMEM_REGION_RXFUSD;
200 		ksd_type = SKMEM_REGION_RXFKSD;
201 		cachep = &arn->arn_rxfksd_cache;
202 	}
203 	ksd_skr = skmem_region_create(name, &srp[ksd_type], NULL, NULL, NULL);
204 	if (ksd_skr == NULL) {
205 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
206 		    "create %s region", ar->ar_name, SK_KVA(ar),
207 		    ar->ar_flags, ARF_BITS, srp[ksd_type].srp_name);
208 		err = ENOMEM;
209 		goto failed;
210 	}
211 	ar->ar_regions[ksd_type] = ksd_skr;
212 	if (!kernel_only) {
213 		usd_skr = skmem_region_create(name, &srp[usd_type], NULL,
214 		    NULL, NULL);
215 		if (usd_skr == NULL) {
216 			err = ENOMEM;
217 			goto failed;
218 		}
219 		ar->ar_regions[usd_type] = usd_skr;
220 		skmem_region_mirror(ksd_skr, usd_skr);
221 	}
222 	snprintf(cname, sizeof(cname), tx ? "txa_ksd.%s" : "rxf_ksd.%s", name);
223 	ASSERT(ar->ar_regions[ksd_type] != NULL);
224 	*cachep = skmem_cache_create(cname,
225 	    srp[ksd_type].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
226 	    ar->ar_regions[ksd_type], SKMEM_CR_NOMAGAZINES);
227 	if (*cachep == NULL) {
228 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
229 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
230 		err = ENOMEM;
231 		goto failed;
232 	}
233 	return 0;
234 
235 failed:
236 	if (ksd_skr != NULL) {
237 		skmem_region_release(ksd_skr);
238 		ar->ar_regions[ksd_type] = NULL;
239 	}
240 	if (usd_skr != NULL) {
241 		/*
242 		 * decrements refcnt incremented by skmem_region_mirror()
243 		 * this is not needed in case skmem_cache_create() succeeds
244 		 * because skmem_cache_destroy() does the release.
245 		 */
246 		skmem_region_release(usd_skr);
247 
248 		/* decrements the region's own refcnt */
249 		skmem_region_release(usd_skr);
250 		ar->ar_regions[usd_type] = NULL;
251 	}
252 	return err;
253 }
254 
255 SK_NO_INLINE_ATTRIBUTE
256 static void
skmem_arena_sd_teardown(struct skmem_arena * ar,boolean_t tx)257 skmem_arena_sd_teardown(struct skmem_arena *ar, boolean_t tx)
258 {
259 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
260 	struct skmem_cache **cachep;
261 	struct skmem_region **ksd_rp, **usd_rp;
262 
263 	if (tx) {
264 		cachep = &arn->arn_txaksd_cache;
265 		ksd_rp = &ar->ar_regions[SKMEM_REGION_TXAKSD];
266 		usd_rp = &ar->ar_regions[SKMEM_REGION_TXAUSD];
267 	} else {
268 		cachep = &arn->arn_rxfksd_cache;
269 		ksd_rp = &ar->ar_regions[SKMEM_REGION_RXFKSD];
270 		usd_rp = &ar->ar_regions[SKMEM_REGION_RXFUSD];
271 	}
272 	if (*cachep != NULL) {
273 		skmem_cache_destroy(*cachep);
274 		*cachep = NULL;
275 	}
276 	if (*usd_rp != NULL) {
277 		skmem_region_release(*usd_rp);
278 		*usd_rp = NULL;
279 	}
280 	if (*ksd_rp != NULL) {
281 		skmem_region_release(*ksd_rp);
282 		*ksd_rp = NULL;
283 	}
284 }
285 
286 static bool
skmem_arena_pp_setup(struct skmem_arena * ar,struct skmem_region_params srp[SKMEM_REGIONS],const char * name,struct kern_pbufpool * rx_pp,struct kern_pbufpool * tx_pp,boolean_t kernel_only,boolean_t pp_truncated_buf)287 skmem_arena_pp_setup(struct skmem_arena *ar,
288     struct skmem_region_params srp[SKMEM_REGIONS], const char *name,
289     struct kern_pbufpool *rx_pp, struct kern_pbufpool *tx_pp,
290     boolean_t kernel_only, boolean_t pp_truncated_buf)
291 {
292 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
293 
294 	if (rx_pp == NULL && tx_pp == NULL) {
295 		uint32_t ppcreatef = 0;
296 		if (pp_truncated_buf) {
297 			ppcreatef |= PPCREATEF_TRUNCATED_BUF;
298 		}
299 		if (kernel_only) {
300 			ppcreatef |= PPCREATEF_KERNEL_ONLY;
301 		}
302 		if (srp[SKMEM_REGION_KMD].srp_max_frags > 1) {
303 			ppcreatef |= PPCREATEF_ONDEMAND_BUF;
304 		}
305 		/* callee retains pp upon success */
306 		rx_pp = pp_create(name, &srp[SKMEM_REGION_BUF],
307 		    &srp[SKMEM_REGION_KMD], &srp[SKMEM_REGION_UMD],
308 		    &srp[SKMEM_REGION_KBFT], &srp[SKMEM_REGION_UBFT], NULL,
309 		    NULL, NULL, NULL, NULL, ppcreatef);
310 		if (rx_pp == NULL) {
311 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create pp",
312 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
313 			return false;
314 		}
315 		pp_retain(rx_pp);
316 		tx_pp = rx_pp;
317 	} else {
318 		if (rx_pp == NULL) {
319 			rx_pp = tx_pp;
320 		} else if (tx_pp == NULL) {
321 			tx_pp = rx_pp;
322 		}
323 
324 		ASSERT(rx_pp->pp_md_type == tx_pp->pp_md_type);
325 		ASSERT(rx_pp->pp_md_subtype == tx_pp->pp_md_subtype);
326 		ASSERT(!(!kernel_only &&
327 		    (PP_KERNEL_ONLY(rx_pp) || (PP_KERNEL_ONLY(tx_pp)))));
328 		arn->arn_mode |= AR_NEXUS_MODE_EXTERNAL_PPOOL;
329 		pp_retain(rx_pp);
330 		pp_retain(tx_pp);
331 	}
332 
333 	arn->arn_rx_pp = rx_pp;
334 	arn->arn_tx_pp = tx_pp;
335 	if (rx_pp == tx_pp) {
336 		skmem_region_retain(rx_pp->pp_buf_region);
337 		ar->ar_regions[SKMEM_REGION_BUF] = rx_pp->pp_buf_region;
338 		ar->ar_regions[SKMEM_REGION_RXBUF] = NULL;
339 		ar->ar_regions[SKMEM_REGION_TXBUF] = NULL;
340 		skmem_region_retain(rx_pp->pp_kmd_region);
341 		ar->ar_regions[SKMEM_REGION_KMD] = rx_pp->pp_kmd_region;
342 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
343 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
344 		if (rx_pp->pp_kbft_region != NULL) {
345 			skmem_region_retain(rx_pp->pp_kbft_region);
346 			ar->ar_regions[SKMEM_REGION_KBFT] =
347 			    rx_pp->pp_kbft_region;
348 		}
349 		ar->ar_regions[SKMEM_REGION_RXKBFT] = NULL;
350 		ar->ar_regions[SKMEM_REGION_TXKBFT] = NULL;
351 	} else {
352 		ASSERT(kernel_only); /* split userspace pools not supported */
353 		ar->ar_regions[SKMEM_REGION_BUF] = NULL;
354 		skmem_region_retain(rx_pp->pp_buf_region);
355 		ar->ar_regions[SKMEM_REGION_RXBUF] = rx_pp->pp_buf_region;
356 		skmem_region_retain(tx_pp->pp_buf_region);
357 		ar->ar_regions[SKMEM_REGION_TXBUF] = tx_pp->pp_buf_region;
358 		ar->ar_regions[SKMEM_REGION_KMD] = NULL;
359 		skmem_region_retain(rx_pp->pp_kmd_region);
360 		ar->ar_regions[SKMEM_REGION_RXKMD] = rx_pp->pp_kmd_region;
361 		skmem_region_retain(tx_pp->pp_kmd_region);
362 		ar->ar_regions[SKMEM_REGION_TXKMD] = tx_pp->pp_kmd_region;
363 		ar->ar_regions[SKMEM_REGION_KBFT] = NULL;
364 		if (rx_pp->pp_kbft_region != NULL) {
365 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(rx_pp));
366 			skmem_region_retain(rx_pp->pp_kbft_region);
367 			ar->ar_regions[SKMEM_REGION_RXKBFT] =
368 			    rx_pp->pp_kbft_region;
369 		}
370 		if (tx_pp->pp_kbft_region != NULL) {
371 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(tx_pp));
372 			skmem_region_retain(tx_pp->pp_kbft_region);
373 			ar->ar_regions[SKMEM_REGION_TXKBFT] =
374 			    tx_pp->pp_kbft_region;
375 		}
376 	}
377 
378 	if (kernel_only) {
379 		if ((arn->arn_mode & AR_NEXUS_MODE_EXTERNAL_PPOOL) == 0) {
380 			ASSERT(PP_KERNEL_ONLY(rx_pp));
381 			ASSERT(PP_KERNEL_ONLY(tx_pp));
382 			ASSERT(rx_pp->pp_umd_region == NULL);
383 			ASSERT(tx_pp->pp_umd_region == NULL);
384 			ASSERT(rx_pp->pp_kmd_region->skr_mirror == NULL);
385 			ASSERT(tx_pp->pp_kmd_region->skr_mirror == NULL);
386 			ASSERT(rx_pp->pp_ubft_region == NULL);
387 			ASSERT(tx_pp->pp_ubft_region == NULL);
388 			if (rx_pp->pp_kbft_region != NULL) {
389 				ASSERT(rx_pp->pp_kbft_region->skr_mirror ==
390 				    NULL);
391 			}
392 			if (tx_pp->pp_kbft_region != NULL) {
393 				ASSERT(tx_pp->pp_kbft_region->skr_mirror ==
394 				    NULL);
395 			}
396 		}
397 	} else {
398 		ASSERT(rx_pp == tx_pp);
399 		ASSERT(!PP_KERNEL_ONLY(rx_pp));
400 		ASSERT(rx_pp->pp_umd_region->skr_mode & SKR_MODE_MIRRORED);
401 		ASSERT(rx_pp->pp_kmd_region->skr_mirror != NULL);
402 		ar->ar_regions[SKMEM_REGION_UMD] = rx_pp->pp_umd_region;
403 		skmem_region_retain(rx_pp->pp_umd_region);
404 		if (rx_pp->pp_kbft_region != NULL) {
405 			ASSERT(rx_pp->pp_kbft_region->skr_mirror != NULL);
406 			ASSERT(rx_pp->pp_ubft_region != NULL);
407 			ASSERT(rx_pp->pp_ubft_region->skr_mode &
408 			    SKR_MODE_MIRRORED);
409 			ar->ar_regions[SKMEM_REGION_UBFT] =
410 			    rx_pp->pp_ubft_region;
411 			skmem_region_retain(rx_pp->pp_ubft_region);
412 		}
413 	}
414 
415 	arn->arn_md_type = rx_pp->pp_md_type;
416 	arn->arn_md_subtype = rx_pp->pp_md_subtype;
417 	return true;
418 }
419 
420 /*
421  * Create a nexus adapter arena.
422  */
423 struct skmem_arena *
skmem_arena_create_for_nexus(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct kern_pbufpool ** tx_pp,struct kern_pbufpool ** rx_pp,boolean_t pp_truncated_buf,boolean_t kernel_only,struct kern_nexus_advisory * nxv,int * perr)424 skmem_arena_create_for_nexus(const struct nexus_adapter *na,
425     struct skmem_region_params srp[SKMEM_REGIONS], struct kern_pbufpool **tx_pp,
426     struct kern_pbufpool **rx_pp, boolean_t pp_truncated_buf,
427     boolean_t kernel_only, struct kern_nexus_advisory *nxv, int *perr)
428 {
429 #define SRP_CFLAGS(_id)         (srp[_id].srp_cflags)
430 	struct skmem_arena_nexus *arn;
431 	struct skmem_arena *ar;
432 	char cname[64];
433 	uint32_t i;
434 	const char *name = na->na_name;
435 
436 	*perr = 0;
437 
438 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NEXUS, name);
439 	ASSERT(ar != NULL && ar->ar_zsize == AR_NEXUS_SIZE);
440 	arn = (struct skmem_arena_nexus *)ar;
441 
442 	/* these regions must not be readable/writeable */
443 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_GUARD);
444 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_GUARD);
445 
446 	/* these regions must be read-only */
447 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_UREADONLY);
448 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_UREADONLY);
449 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_UREADONLY);
450 	if ((na->na_flags & NAF_USER_PKT_POOL) == 0) {
451 		ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
452 		    SKMEM_REGION_CR_UREADONLY);
453 		ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
454 		    SKMEM_REGION_CR_UREADONLY);
455 	} else {
456 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
457 		    SKMEM_REGION_CR_UREADONLY));
458 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
459 		    SKMEM_REGION_CR_UREADONLY));
460 	}
461 
462 	/* these regions must be user-mappable */
463 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_MMAPOK);
464 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_MMAPOK);
465 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_MMAPOK);
466 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF) & SKMEM_REGION_CR_MMAPOK);
467 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_MMAPOK);
468 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_MMAPOK);
469 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_MMAPOK);
470 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_MMAPOK);
471 	ASSERT(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_MMAPOK);
472 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_MMAPOK);
473 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_MMAPOK);
474 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_MMAPOK);
475 
476 	/* these must not be user-mappable */
477 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MMAPOK));
478 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MMAPOK));
479 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MMAPOK));
480 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MMAPOK));
481 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MMAPOK));
482 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MMAPOK));
483 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_MMAPOK));
484 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_MMAPOK));
485 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_MMAPOK));
486 
487 	/* these regions must be shareable */
488 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF) & SKMEM_REGION_CR_SHAREOK);
489 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF) & SKMEM_REGION_CR_SHAREOK);
490 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF) & SKMEM_REGION_CR_SHAREOK);
491 
492 	/* these regions must not be be shareable */
493 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_SHAREOK));
494 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_SHAREOK));
495 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_SHAREOK));
496 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_SHAREOK));
497 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_SHAREOK));
498 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_SHAREOK));
499 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_SHAREOK));
500 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_SHAREOK));
501 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_SHAREOK));
502 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_SHAREOK));
503 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_SHAREOK));
504 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_SHAREOK));
505 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_SHAREOK));
506 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_SHAREOK));
507 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_SHAREOK));
508 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_SHAREOK));
509 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_SHAREOK));
510 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_SHAREOK));
511 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_SHAREOK));
512 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_SHAREOK));
513 
514 	/* these must stay active */
515 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_NOREDIRECT);
516 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_NOREDIRECT);
517 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_NOREDIRECT);
518 
519 	/* no kstats for nexus */
520 	ASSERT(srp[SKMEM_REGION_KSTATS].srp_c_obj_cnt == 0);
521 
522 	AR_LOCK(ar);
523 	if (!skmem_arena_pp_setup(ar, srp, name, (rx_pp ? *rx_pp : NULL),
524 	    (tx_pp ? *tx_pp : NULL), kernel_only, pp_truncated_buf)) {
525 		goto failed;
526 	}
527 
528 	if (nxv != NULL && nxv->nxv_reg != NULL) {
529 		struct skmem_region *svr = nxv->nxv_reg;
530 
531 		ASSERT(svr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
532 		ASSERT(svr->skr_seg_max_cnt == 1);
533 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = svr;
534 		skmem_region_retain(svr);
535 
536 		ASSERT(nxv->nxv_adv != NULL);
537 		if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
538 			VERIFY(nxv->flowswitch_nxv_adv->nxadv_ver ==
539 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
540 		} else if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
541 			VERIFY(nxv->netif_nxv_adv->nna_version ==
542 			    NX_NETIF_ADVISORY_CURRENT_VERSION);
543 		} else {
544 			panic_plain("%s: invalid advisory type %d",
545 			    __func__, nxv->nxv_adv_type);
546 			/* NOTREACHED */
547 		}
548 		arn->arn_nexusadv_obj = nxv->nxv_adv;
549 	} else {
550 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
551 		ASSERT(srp[SKMEM_REGION_NEXUSADV].srp_c_obj_cnt == 0);
552 	}
553 
554 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, TRUE) != 0) {
555 		goto failed;
556 	}
557 
558 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, FALSE) != 0) {
559 		goto failed;
560 	}
561 
562 	for (i = 0; i < SKMEM_REGIONS; i++) {
563 		/* skip if already created */
564 		if (ar->ar_regions[i] != NULL) {
565 			continue;
566 		}
567 
568 		/* skip external regions from packet pool */
569 		if (skmem_region_for_pp(i)) {
570 			continue;
571 		}
572 
573 		/* skip slot descriptor regions */
574 		if (i == SKMEM_REGION_TXAUSD || i == SKMEM_REGION_RXFUSD ||
575 		    i == SKMEM_REGION_TXAKSD || i == SKMEM_REGION_RXFKSD) {
576 			continue;
577 		}
578 
579 		/* skip if region is configured to be empty */
580 		if (srp[i].srp_c_obj_cnt == 0) {
581 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
582 			    i == SKMEM_REGION_USTATS ||
583 			    i == SKMEM_REGION_KSTATS ||
584 			    i == SKMEM_REGION_INTRINSIC ||
585 			    i == SKMEM_REGION_FLOWADV ||
586 			    i == SKMEM_REGION_NEXUSADV ||
587 			    i == SKMEM_REGION_SYSCTLS ||
588 			    i == SKMEM_REGION_GUARD_TAIL);
589 			continue;
590 		}
591 
592 		ASSERT(srp[i].srp_id == i);
593 
594 		/*
595 		 * Skip {SCHEMA, RING, GUARD} for kernel-only arena.  Note
596 		 * that this is assuming kernel-only arena is always used
597 		 * for kernel-only nexus adapters (never used directly by
598 		 * user process.)
599 		 *
600 		 * XXX [email protected] - see comments in kern_pbufpool_create().
601 		 * We need to revisit this logic for "direct channel" access,
602 		 * perhaps via a separate adapter flag.
603 		 */
604 		if (kernel_only && (i == SKMEM_REGION_GUARD_HEAD ||
605 		    i == SKMEM_REGION_SCHEMA || i == SKMEM_REGION_RING ||
606 		    i == SKMEM_REGION_GUARD_TAIL)) {
607 			continue;
608 		}
609 
610 		/* not for nexus, or for us to create here */
611 		ASSERT(i != SKMEM_REGION_GUARD_HEAD || sk_guard);
612 		ASSERT(i != SKMEM_REGION_NEXUSADV);
613 		ASSERT(i != SKMEM_REGION_SYSCTLS);
614 		ASSERT(i != SKMEM_REGION_GUARD_TAIL || sk_guard);
615 		ASSERT(i != SKMEM_REGION_KSTATS);
616 		ASSERT(i != SKMEM_REGION_INTRINSIC);
617 
618 		/* otherwise create it */
619 		if ((ar->ar_regions[i] = skmem_region_create(name, &srp[i],
620 		    NULL, NULL, NULL)) == NULL) {
621 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
622 			    "create %s region", ar->ar_name, SK_KVA(ar),
623 			    ar->ar_flags, ARF_BITS, srp[i].srp_name);
624 			goto failed;
625 		}
626 	}
627 
628 	/* create skmem_cache for schema (without magazines) */
629 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL || kernel_only);
630 	if (ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL) {
631 		(void) snprintf(cname, sizeof(cname), "schema.%s", name);
632 		if ((arn->arn_schema_cache = skmem_cache_create(cname,
633 		    srp[SKMEM_REGION_SCHEMA].srp_c_obj_size, 0, NULL, NULL,
634 		    NULL, NULL, ar->ar_regions[SKMEM_REGION_SCHEMA],
635 		    SKMEM_CR_NOMAGAZINES)) == NULL) {
636 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
637 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
638 			    cname);
639 			goto failed;
640 		}
641 	}
642 
643 	/* create skmem_cache for rings (without magazines) */
644 	(void) snprintf(cname, sizeof(cname), "ring.%s", name);
645 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] != NULL || kernel_only);
646 	if ((ar->ar_regions[SKMEM_REGION_RING] != NULL) &&
647 	    (arn->arn_ring_cache = skmem_cache_create(cname,
648 	    srp[SKMEM_REGION_RING].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
649 	    ar->ar_regions[SKMEM_REGION_RING], SKMEM_CR_NOMAGAZINES)) == NULL) {
650 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
651 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
652 		goto failed;
653 	}
654 
655 	/*
656 	 * If the stats region is present, allocate a single object directly
657 	 * from the region; we don't need to create an skmem_cache for this,
658 	 * as the object is allocated (and freed) only once.
659 	 */
660 	if (ar->ar_regions[SKMEM_REGION_USTATS] != NULL) {
661 		struct skmem_region *str = ar->ar_regions[SKMEM_REGION_USTATS];
662 
663 		/* no kstats for nexus */
664 		ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
665 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
666 		ASSERT(str->skr_seg_max_cnt == 1);
667 
668 		if ((arn->arn_stats_obj = skmem_region_alloc(str, NULL,
669 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
670 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
671 			    "stats", ar->ar_name, SK_KVA(ar), ar->ar_flags,
672 			    ARF_BITS);
673 			goto failed;
674 		}
675 	}
676 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
677 
678 	/*
679 	 * If the flowadv region is present, allocate a single object directly
680 	 * from the region; we don't need to create an skmem_cache for this,
681 	 * as the object is allocated (and freed) only once.
682 	 */
683 	if (ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL) {
684 		struct skmem_region *str =
685 		    ar->ar_regions[SKMEM_REGION_FLOWADV];
686 
687 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
688 		ASSERT(str->skr_seg_max_cnt == 1);
689 
690 		if ((arn->arn_flowadv_obj = skmem_region_alloc(str, NULL,
691 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
692 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
693 			    "flowadv", ar->ar_name, SK_KVA(ar), ar->ar_flags,
694 			    ARF_BITS);
695 			goto failed;
696 		}
697 	}
698 
699 	if (skmem_arena_create_finalize(ar) != 0) {
700 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
701 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
702 		goto failed;
703 	}
704 
705 	++ar->ar_refcnt;        /* for caller */
706 	AR_UNLOCK(ar);
707 
708 	SKMEM_ARENA_LOCK();
709 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
710 	SKMEM_ARENA_UNLOCK();
711 
712 	/* caller didn't give us one, but would like us to return it? */
713 	if (rx_pp != NULL && *rx_pp == NULL) {
714 		*rx_pp = arn->arn_rx_pp;
715 		pp_retain(*rx_pp);
716 	}
717 	if (tx_pp != NULL && *tx_pp == NULL) {
718 		*tx_pp = arn->arn_tx_pp;
719 		pp_retain(*tx_pp);  /* for caller */
720 	}
721 
722 #if SK_LOG
723 	if (__improbable(sk_verbose != 0)) {
724 		skmem_arena_create_region_log(ar);
725 	}
726 #endif /* SK_LOG */
727 
728 	return ar;
729 
730 failed:
731 	AR_LOCK_ASSERT_HELD(ar);
732 	skmem_arena_destroy(ar);
733 	*perr = ENOMEM;
734 
735 	return NULL;
736 #undef SRP_CFLAGS
737 }
738 
739 void
skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus * arn,int cnt)740 skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus *arn, int cnt)
741 {
742 	struct skmem_arena *ar = &arn->arn_cmn;
743 
744 	AR_LOCK(ar);
745 	arn->arn_ksd_nodefunct += cnt;
746 	VERIFY(arn->arn_ksd_nodefunct >= 0);
747 	AR_UNLOCK(ar);
748 }
749 
750 boolean_t
skmem_arena_nexus_sd_idle(struct skmem_arena_nexus * arn)751 skmem_arena_nexus_sd_idle(struct skmem_arena_nexus *arn)
752 {
753 	struct skmem_arena *ar = &arn->arn_cmn;
754 	boolean_t idle;
755 
756 	AR_LOCK(ar);
757 	VERIFY(arn->arn_ksd_nodefunct >= 0);
758 	idle = (arn->arn_ksd_nodefunct == 0);
759 	AR_UNLOCK(ar);
760 
761 	return idle;
762 }
763 
764 static void
skmem_arena_nexus_teardown(struct skmem_arena_nexus * arn,boolean_t defunct)765 skmem_arena_nexus_teardown(struct skmem_arena_nexus *arn, boolean_t defunct)
766 {
767 	struct skmem_arena *ar = &arn->arn_cmn;
768 	struct skmem_region *skr;
769 	int i;
770 
771 	AR_LOCK_ASSERT_HELD(ar);
772 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
773 
774 	/* these should never be set for nexus arena */
775 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL || sk_guard);
776 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
777 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL || sk_guard);
778 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
779 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
780 
781 	if (arn->arn_stats_obj != NULL) {
782 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
783 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
784 		skmem_region_free(skr, arn->arn_stats_obj, NULL);
785 		arn->arn_stats_obj = NULL;
786 		skmem_region_release(skr);
787 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
788 	}
789 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
790 	ASSERT(arn->arn_stats_obj == NULL);
791 
792 	if (arn->arn_flowadv_obj != NULL) {
793 		skr = ar->ar_regions[SKMEM_REGION_FLOWADV];
794 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
795 		skmem_region_free(skr, arn->arn_flowadv_obj, NULL);
796 		arn->arn_flowadv_obj = NULL;
797 		skmem_region_release(skr);
798 		ar->ar_regions[SKMEM_REGION_FLOWADV] = NULL;
799 	}
800 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
801 	ASSERT(arn->arn_flowadv_obj == NULL);
802 
803 	if (arn->arn_nexusadv_obj != NULL) {
804 		skr = ar->ar_regions[SKMEM_REGION_NEXUSADV];
805 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
806 		/* we didn't allocate this, so just nullify it */
807 		arn->arn_nexusadv_obj = NULL;
808 		skmem_region_release(skr);
809 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = NULL;
810 	}
811 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
812 	ASSERT(arn->arn_nexusadv_obj == NULL);
813 
814 	ASSERT(!((arn->arn_rx_pp == NULL) ^ (arn->arn_tx_pp == NULL)));
815 	if (arn->arn_rx_pp != NULL) {
816 		for (i = 0; i < SKMEM_PP_REGIONS; i++) {
817 			skmem_region_id_t reg = skmem_pp_region_ids[i];
818 			skr = ar->ar_regions[reg];
819 			if (skr != NULL) {
820 				ASSERT(!(skr->skr_mode & SKR_MODE_NOREDIRECT));
821 				skmem_region_release(skr);
822 				ar->ar_regions[reg] = NULL;
823 			}
824 		}
825 		pp_release(arn->arn_rx_pp);
826 		pp_release(arn->arn_tx_pp);
827 		arn->arn_rx_pp = NULL;
828 		arn->arn_tx_pp = NULL;
829 	}
830 	for (i = 0; i < SKMEM_PP_REGIONS; i++) {
831 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
832 	}
833 	ASSERT(arn->arn_rx_pp == NULL);
834 	ASSERT(arn->arn_tx_pp == NULL);
835 
836 	if (arn->arn_ring_cache != NULL) {
837 		skr = ar->ar_regions[SKMEM_REGION_RING];
838 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
839 		skmem_cache_destroy(arn->arn_ring_cache);
840 		arn->arn_ring_cache = NULL;
841 		skmem_region_release(skr);
842 		ar->ar_regions[SKMEM_REGION_RING] = NULL;
843 	}
844 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
845 	ASSERT(arn->arn_ring_cache == NULL);
846 
847 	/*
848 	 * Stop here if we're in the defunct context, and we're asked
849 	 * to keep the slot descriptor regions alive as they are still
850 	 * being referred to by the nexus owner (driver).
851 	 */
852 	if (defunct && arn->arn_ksd_nodefunct != 0) {
853 		ASSERT(arn->arn_ksd_nodefunct > 0);
854 		return;
855 	}
856 
857 	ASSERT(arn->arn_ksd_nodefunct == 0);
858 	skmem_arena_sd_teardown(ar, TRUE);
859 	skmem_arena_sd_teardown(ar, FALSE);
860 
861 	/* stop here if we're in the defunct context */
862 	if (defunct) {
863 		return;
864 	}
865 	if (arn->arn_schema_cache != NULL) {
866 		skr = ar->ar_regions[SKMEM_REGION_SCHEMA];
867 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
868 		skmem_cache_destroy(arn->arn_schema_cache);
869 		arn->arn_schema_cache = NULL;
870 		skmem_region_release(skr);
871 		ar->ar_regions[SKMEM_REGION_SCHEMA] = NULL;
872 	}
873 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
874 	ASSERT(arn->arn_schema_cache == NULL);
875 
876 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_HEAD]) != NULL) {
877 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
878 		skmem_region_release(skr);
879 		ar->ar_regions[SKMEM_REGION_GUARD_HEAD] = NULL;
880 	}
881 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
882 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_TAIL]) != NULL) {
883 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
884 		skmem_region_release(skr);
885 		ar->ar_regions[SKMEM_REGION_GUARD_TAIL] = NULL;
886 	}
887 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
888 }
889 
890 /*
891  * Create an NECP arena.
892  */
893 struct skmem_arena *
skmem_arena_create_for_necp(const char * name,struct skmem_region_params * srp_ustats,struct skmem_region_params * srp_kstats,int * perr)894 skmem_arena_create_for_necp(const char *name,
895     struct skmem_region_params *srp_ustats,
896     struct skmem_region_params *srp_kstats, int *perr)
897 {
898 	struct skmem_arena_necp *arc;
899 	struct skmem_arena *ar;
900 	char cname[64];
901 
902 	*perr = 0;
903 
904 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NECP, name);
905 	ASSERT(ar != NULL && ar->ar_zsize == AR_NECP_SIZE);
906 	arc = (struct skmem_arena_necp *)ar;
907 
908 	/*
909 	 * Must be stats region, and must be user-mappable;
910 	 * don't assert for SKMEM_REGION_CR_MONOLITHIC here
911 	 * as the client might want multi-segment mode.
912 	 */
913 	ASSERT(srp_ustats->srp_id == SKMEM_REGION_USTATS);
914 	ASSERT(srp_kstats->srp_id == SKMEM_REGION_KSTATS);
915 	ASSERT(srp_ustats->srp_cflags & SKMEM_REGION_CR_MMAPOK);
916 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_MMAPOK));
917 	ASSERT(!(srp_ustats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
918 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
919 	ASSERT(srp_ustats->srp_c_obj_size != 0);
920 	ASSERT(srp_kstats->srp_c_obj_size != 0);
921 	ASSERT(srp_ustats->srp_c_obj_cnt != 0);
922 	ASSERT(srp_kstats->srp_c_obj_cnt != 0);
923 	ASSERT(srp_ustats->srp_c_seg_size == srp_kstats->srp_c_seg_size);
924 	ASSERT(srp_ustats->srp_seg_cnt == srp_kstats->srp_seg_cnt);
925 	ASSERT(srp_ustats->srp_c_obj_size == srp_kstats->srp_c_obj_size);
926 	ASSERT(srp_ustats->srp_c_obj_cnt == srp_kstats->srp_c_obj_cnt);
927 
928 	AR_LOCK(ar);
929 
930 	if ((ar->ar_regions[SKMEM_REGION_USTATS] = skmem_region_create(name,
931 	    srp_ustats, NULL, NULL, NULL)) == NULL) {
932 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
933 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
934 		    srp_ustats->srp_name);
935 		goto failed;
936 	}
937 
938 	if ((ar->ar_regions[SKMEM_REGION_KSTATS] = skmem_region_create(name,
939 	    srp_kstats, NULL, NULL, NULL)) == NULL) {
940 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
941 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
942 		    srp_kstats->srp_name);
943 		goto failed;
944 	}
945 
946 	skmem_region_mirror(ar->ar_regions[SKMEM_REGION_KSTATS],
947 	    ar->ar_regions[SKMEM_REGION_USTATS]);
948 
949 	/* create skmem_cache for kernel stats (without magazines) */
950 	(void) snprintf(cname, sizeof(cname), "kstats.%s", name);
951 	if ((arc->arc_kstats_cache = skmem_cache_create(cname,
952 	    srp_kstats->srp_c_obj_size, 0, necp_stats_ctor, NULL, NULL, NULL,
953 	    ar->ar_regions[SKMEM_REGION_KSTATS],
954 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
955 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
956 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
957 		goto failed;
958 	}
959 
960 	if (skmem_arena_create_finalize(ar) != 0) {
961 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
962 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
963 		goto failed;
964 	}
965 
966 	/*
967 	 * These must never be configured for NECP arena.
968 	 *
969 	 * XXX: In theory we can add guard pages to this arena,
970 	 * but for now leave that as an exercise for the future.
971 	 */
972 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
973 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
974 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
975 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
976 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
977 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
978 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
979 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
980 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
981 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
982 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
983 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
984 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
985 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
986 	}
987 
988 	/* these must be configured for NECP arena */
989 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
990 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] != NULL);
991 
992 	++ar->ar_refcnt;        /* for caller */
993 	AR_UNLOCK(ar);
994 
995 	SKMEM_ARENA_LOCK();
996 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
997 	SKMEM_ARENA_UNLOCK();
998 
999 #if SK_LOG
1000 	if (__improbable(sk_verbose != 0)) {
1001 		skmem_arena_create_region_log(ar);
1002 	}
1003 #endif /* SK_LOG */
1004 
1005 	return ar;
1006 
1007 failed:
1008 	AR_LOCK_ASSERT_HELD(ar);
1009 	skmem_arena_destroy(ar);
1010 	*perr = ENOMEM;
1011 
1012 	return NULL;
1013 }
1014 
1015 static void
skmem_arena_necp_teardown(struct skmem_arena_necp * arc,boolean_t defunct)1016 skmem_arena_necp_teardown(struct skmem_arena_necp *arc, boolean_t defunct)
1017 {
1018 #pragma unused(defunct)
1019 	struct skmem_arena *ar = &arc->arc_cmn;
1020 	struct skmem_region *skr;
1021 
1022 	AR_LOCK_ASSERT_HELD(ar);
1023 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NECP);
1024 
1025 	/* these must never be configured for NECP arena */
1026 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1027 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1028 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1029 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1030 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1031 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1032 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1033 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1034 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1035 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1036 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1037 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1038 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1039 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1040 	}
1041 
1042 	if (arc->arc_kstats_cache != NULL) {
1043 		skr = ar->ar_regions[SKMEM_REGION_KSTATS];
1044 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1045 		skmem_cache_destroy(arc->arc_kstats_cache);
1046 		arc->arc_kstats_cache = NULL;
1047 		skmem_region_release(skr);
1048 		ar->ar_regions[SKMEM_REGION_KSTATS] = NULL;
1049 
1050 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
1051 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1052 		skmem_region_release(skr);
1053 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
1054 	}
1055 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1056 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1057 	ASSERT(arc->arc_kstats_cache == NULL);
1058 }
1059 
1060 /*
1061  * Given an arena, return its NECP variant (if applicable).
1062  */
1063 struct skmem_arena_necp *
skmem_arena_necp(struct skmem_arena * ar)1064 skmem_arena_necp(struct skmem_arena *ar)
1065 {
1066 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_NECP)) {
1067 		return NULL;
1068 	}
1069 
1070 	return (struct skmem_arena_necp *)ar;
1071 }
1072 
1073 /*
1074  * Create a System arena.
1075  */
1076 struct skmem_arena *
skmem_arena_create_for_system(const char * name,int * perr)1077 skmem_arena_create_for_system(const char *name, int *perr)
1078 {
1079 	struct skmem_region *skrsys;
1080 	struct skmem_arena_system *ars;
1081 	struct skmem_arena *ar;
1082 
1083 	*perr = 0;
1084 
1085 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_SYSTEM, name);
1086 	ASSERT(ar != NULL && ar->ar_zsize == AR_SYSTEM_SIZE);
1087 	ars = (struct skmem_arena_system *)ar;
1088 
1089 	AR_LOCK(ar);
1090 	/* retain system-wide sysctls region */
1091 	skrsys = skmem_get_sysctls_region();
1092 	ASSERT(skrsys != NULL && skrsys->skr_id == SKMEM_REGION_SYSCTLS);
1093 	ASSERT((skrsys->skr_mode & (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES |
1094 	    SKR_MODE_KREADONLY | SKR_MODE_UREADONLY | SKR_MODE_MONOLITHIC |
1095 	    SKR_MODE_SHAREOK)) ==
1096 	    (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES | SKR_MODE_UREADONLY |
1097 	    SKR_MODE_MONOLITHIC));
1098 	ar->ar_regions[SKMEM_REGION_SYSCTLS] = skrsys;
1099 	skmem_region_retain(skrsys);
1100 
1101 	/* object is valid as long as the sysctls region is retained */
1102 	ars->ars_sysctls_obj = skmem_get_sysctls_obj(&ars->ars_sysctls_objsize);
1103 	ASSERT(ars->ars_sysctls_obj != NULL);
1104 	ASSERT(ars->ars_sysctls_objsize != 0);
1105 
1106 	if (skmem_arena_create_finalize(ar) != 0) {
1107 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
1108 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1109 		goto failed;
1110 	}
1111 
1112 	/*
1113 	 * These must never be configured for system arena.
1114 	 *
1115 	 * XXX: In theory we can add guard pages to this arena,
1116 	 * but for now leave that as an exercise for the future.
1117 	 */
1118 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1119 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1120 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1121 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1122 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1123 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1124 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1125 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1126 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1127 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1128 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1129 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1130 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1131 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1132 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1133 	}
1134 
1135 	/* these must be configured for system arena */
1136 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] != NULL);
1137 
1138 	++ar->ar_refcnt;        /* for caller */
1139 	AR_UNLOCK(ar);
1140 
1141 	SKMEM_ARENA_LOCK();
1142 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1143 	SKMEM_ARENA_UNLOCK();
1144 
1145 #if SK_LOG
1146 	if (__improbable(sk_verbose != 0)) {
1147 		skmem_arena_create_region_log(ar);
1148 	}
1149 #endif /* SK_LOG */
1150 
1151 	return ar;
1152 
1153 failed:
1154 	AR_LOCK_ASSERT_HELD(ar);
1155 	skmem_arena_destroy(ar);
1156 	*perr = ENOMEM;
1157 
1158 	return NULL;
1159 }
1160 
1161 static void
skmem_arena_system_teardown(struct skmem_arena_system * ars,boolean_t defunct)1162 skmem_arena_system_teardown(struct skmem_arena_system *ars, boolean_t defunct)
1163 {
1164 	struct skmem_arena *ar = &ars->ars_cmn;
1165 	struct skmem_region *skr;
1166 
1167 	AR_LOCK_ASSERT_HELD(ar);
1168 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1169 
1170 	/* these must never be configured for system arena */
1171 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1172 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1173 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1174 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1175 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1176 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1177 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1178 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1179 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1180 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1181 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1182 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1183 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1184 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1185 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1186 	}
1187 
1188 	/* nothing to do here for now during defunct, just return */
1189 	if (defunct) {
1190 		return;
1191 	}
1192 
1193 	if (ars->ars_sysctls_obj != NULL) {
1194 		skr = ar->ar_regions[SKMEM_REGION_SYSCTLS];
1195 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
1196 		/* we didn't allocate this, so don't free it */
1197 		ars->ars_sysctls_obj = NULL;
1198 		ars->ars_sysctls_objsize = 0;
1199 		skmem_region_release(skr);
1200 		ar->ar_regions[SKMEM_REGION_SYSCTLS] = NULL;
1201 	}
1202 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1203 	ASSERT(ars->ars_sysctls_obj == NULL);
1204 	ASSERT(ars->ars_sysctls_objsize == 0);
1205 }
1206 
1207 /*
1208  * Given an arena, return its System variant (if applicable).
1209  */
1210 struct skmem_arena_system *
skmem_arena_system(struct skmem_arena * ar)1211 skmem_arena_system(struct skmem_arena *ar)
1212 {
1213 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_SYSTEM)) {
1214 		return NULL;
1215 	}
1216 
1217 	return (struct skmem_arena_system *)ar;
1218 }
1219 
1220 void *
skmem_arena_system_sysctls_obj_addr(struct skmem_arena * ar)1221 skmem_arena_system_sysctls_obj_addr(struct skmem_arena *ar)
1222 {
1223 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1224 	return skmem_arena_system(ar)->ars_sysctls_obj;
1225 }
1226 
1227 size_t
skmem_arena_system_sysctls_obj_size(struct skmem_arena * ar)1228 skmem_arena_system_sysctls_obj_size(struct skmem_arena *ar)
1229 {
1230 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1231 	return skmem_arena_system(ar)->ars_sysctls_objsize;
1232 }
1233 
1234 /*
1235  * Destroy a region.
1236  */
1237 static void
skmem_arena_destroy(struct skmem_arena * ar)1238 skmem_arena_destroy(struct skmem_arena *ar)
1239 {
1240 	AR_LOCK_ASSERT_HELD(ar);
1241 
1242 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags %b",
1243 	    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1244 
1245 	ASSERT(ar->ar_refcnt == 0);
1246 	if (ar->ar_link.tqe_next != NULL || ar->ar_link.tqe_prev != NULL) {
1247 		AR_UNLOCK(ar);
1248 		SKMEM_ARENA_LOCK();
1249 		TAILQ_REMOVE(&skmem_arena_head, ar, ar_link);
1250 		SKMEM_ARENA_UNLOCK();
1251 		AR_LOCK(ar);
1252 		ASSERT(ar->ar_refcnt == 0);
1253 	}
1254 
1255 	/* teardown all remaining memory regions and associated resources */
1256 	skmem_arena_teardown(ar, FALSE);
1257 
1258 	if (ar->ar_ar != NULL) {
1259 		IOSKArenaDestroy(ar->ar_ar);
1260 		ar->ar_ar = NULL;
1261 	}
1262 
1263 	if (ar->ar_flags & ARF_ACTIVE) {
1264 		ar->ar_flags &= ~ARF_ACTIVE;
1265 	}
1266 
1267 	AR_UNLOCK(ar);
1268 
1269 	skmem_arena_free(ar);
1270 }
1271 
1272 /*
1273  * Teardown (or defunct) a region.
1274  */
1275 static void
skmem_arena_teardown(struct skmem_arena * ar,boolean_t defunct)1276 skmem_arena_teardown(struct skmem_arena *ar, boolean_t defunct)
1277 {
1278 	uint32_t i;
1279 
1280 	switch (ar->ar_type) {
1281 	case SKMEM_ARENA_TYPE_NEXUS:
1282 		skmem_arena_nexus_teardown((struct skmem_arena_nexus *)ar,
1283 		    defunct);
1284 		break;
1285 
1286 	case SKMEM_ARENA_TYPE_NECP:
1287 		skmem_arena_necp_teardown((struct skmem_arena_necp *)ar,
1288 		    defunct);
1289 		break;
1290 
1291 	case SKMEM_ARENA_TYPE_SYSTEM:
1292 		skmem_arena_system_teardown((struct skmem_arena_system *)ar,
1293 		    defunct);
1294 		break;
1295 
1296 	default:
1297 		VERIFY(0);
1298 		/* NOTREACHED */
1299 		__builtin_unreachable();
1300 	}
1301 
1302 	/* stop here if we're in the defunct context */
1303 	if (defunct) {
1304 		return;
1305 	}
1306 
1307 	/* take care of any remaining ones */
1308 	for (i = 0; i < SKMEM_REGIONS; i++) {
1309 		if (ar->ar_regions[i] == NULL) {
1310 			continue;
1311 		}
1312 
1313 		skmem_region_release(ar->ar_regions[i]);
1314 		ar->ar_regions[i] = NULL;
1315 	}
1316 }
1317 
1318 static int
skmem_arena_create_finalize(struct skmem_arena * ar)1319 skmem_arena_create_finalize(struct skmem_arena *ar)
1320 {
1321 	IOSKRegionRef reg[SKMEM_REGIONS];
1322 	uint32_t i, regcnt = 0;
1323 	int err = 0;
1324 
1325 	AR_LOCK_ASSERT_HELD(ar);
1326 
1327 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1328 
1329 	/*
1330 	 * Prepare an array of regions that can be mapped to user task;
1331 	 * exclude regions that aren't eligible for user task mapping.
1332 	 */
1333 	bzero(&reg, sizeof(reg));
1334 	for (i = 0; i < SKMEM_REGIONS; i++) {
1335 		struct skmem_region *skr = ar->ar_regions[i];
1336 		if (skr == NULL || !(skr->skr_mode & SKR_MODE_MMAPOK)) {
1337 			continue;
1338 		}
1339 
1340 		ASSERT(skr->skr_reg != NULL);
1341 		reg[regcnt++] = skr->skr_reg;
1342 	}
1343 	ASSERT(regcnt != 0);
1344 
1345 	/*
1346 	 * Create backing IOSKArena handle.
1347 	 */
1348 	ar->ar_ar = IOSKArenaCreate(reg, (IOSKCount)regcnt);
1349 	if (ar->ar_ar == NULL) {
1350 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create "
1351 		    "IOSKArena of %u regions", ar->ar_name, SK_KVA(ar),
1352 		    ar->ar_flags, ARF_BITS, regcnt);
1353 		err = ENOMEM;
1354 		goto failed;
1355 	}
1356 
1357 	ar->ar_flags |= ARF_ACTIVE;
1358 
1359 failed:
1360 	return err;
1361 }
1362 
1363 static struct skmem_arena *
skmem_arena_alloc(skmem_arena_type_t type,const char * name)1364 skmem_arena_alloc(skmem_arena_type_t type, const char *name)
1365 {
1366 	const char *ar_str = NULL;
1367 	struct zone *ar_zone = NULL;
1368 	struct skmem_arena *ar;
1369 	size_t ar_zsize = 0;
1370 
1371 	switch (type) {
1372 	case SKMEM_ARENA_TYPE_NEXUS:
1373 		ar_zone = ar_nexus_zone;
1374 		ar_zsize = AR_NEXUS_SIZE;
1375 		ar_str = "nexus";
1376 		break;
1377 
1378 	case SKMEM_ARENA_TYPE_NECP:
1379 		ar_zone = ar_necp_zone;
1380 		ar_zsize = AR_NECP_SIZE;
1381 		ar_str = "necp";
1382 		break;
1383 
1384 	case SKMEM_ARENA_TYPE_SYSTEM:
1385 		ar_zone = ar_system_zone;
1386 		ar_zsize = AR_SYSTEM_SIZE;
1387 		ar_str = "system";
1388 		break;
1389 
1390 	default:
1391 		VERIFY(0);
1392 		/* NOTREACHED */
1393 		__builtin_unreachable();
1394 	}
1395 
1396 	ar = zalloc_flags(ar_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1397 	ar->ar_type = type;
1398 	ar->ar_zsize = ar_zsize;
1399 	ar->ar_zone = ar_zone;
1400 
1401 	lck_mtx_init(&ar->ar_lock, &skmem_arena_lock_grp,
1402 	    LCK_ATTR_NULL);
1403 	(void) snprintf(ar->ar_name, sizeof(ar->ar_name),
1404 	    "%s.%s.%s", SKMEM_ARENA_PREFIX, ar_str, name);
1405 
1406 	return ar;
1407 }
1408 
1409 static void
skmem_arena_free(struct skmem_arena * ar)1410 skmem_arena_free(struct skmem_arena *ar)
1411 {
1412 #if DEBUG || DEVELOPMENT
1413 	ASSERT(ar->ar_refcnt == 0);
1414 	ASSERT(!(ar->ar_flags & ARF_ACTIVE));
1415 	ASSERT(ar->ar_ar == NULL);
1416 	ASSERT(ar->ar_mapcnt == 0);
1417 	ASSERT(SLIST_EMPTY(&ar->ar_map_head));
1418 	for (uint32_t i = 0; i < SKMEM_REGIONS; i++) {
1419 		ASSERT(ar->ar_regions[i] == NULL);
1420 	}
1421 #endif /* DEBUG || DEVELOPMENT */
1422 
1423 	lck_mtx_destroy(&ar->ar_lock, &skmem_arena_lock_grp);
1424 	zfree(ar->ar_zone, ar);
1425 }
1426 
1427 /*
1428  * Retain an arena.
1429  */
1430 __attribute__((always_inline))
1431 static inline void
skmem_arena_retain_locked(struct skmem_arena * ar)1432 skmem_arena_retain_locked(struct skmem_arena *ar)
1433 {
1434 	AR_LOCK_ASSERT_HELD(ar);
1435 	ar->ar_refcnt++;
1436 	ASSERT(ar->ar_refcnt != 0);
1437 }
1438 
1439 void
skmem_arena_retain(struct skmem_arena * ar)1440 skmem_arena_retain(struct skmem_arena *ar)
1441 {
1442 	AR_LOCK(ar);
1443 	skmem_arena_retain_locked(ar);
1444 	AR_UNLOCK(ar);
1445 }
1446 
1447 /*
1448  * Release (and potentially destroy) an arena.
1449  */
1450 __attribute__((always_inline))
1451 static inline boolean_t
skmem_arena_release_locked(struct skmem_arena * ar)1452 skmem_arena_release_locked(struct skmem_arena *ar)
1453 {
1454 	boolean_t lastref = FALSE;
1455 
1456 	AR_LOCK_ASSERT_HELD(ar);
1457 	ASSERT(ar->ar_refcnt != 0);
1458 	if (--ar->ar_refcnt == 0) {
1459 		skmem_arena_destroy(ar);
1460 		lastref = TRUE;
1461 	} else {
1462 		lastref = FALSE;
1463 	}
1464 
1465 	return lastref;
1466 }
1467 
1468 boolean_t
skmem_arena_release(struct skmem_arena * ar)1469 skmem_arena_release(struct skmem_arena *ar)
1470 {
1471 	boolean_t lastref;
1472 
1473 	AR_LOCK(ar);
1474 	/* unlock only if this isn't the last reference */
1475 	if (!(lastref = skmem_arena_release_locked(ar))) {
1476 		AR_UNLOCK(ar);
1477 	}
1478 
1479 	return lastref;
1480 }
1481 
1482 /*
1483  * Map an arena to the task's address space.
1484  */
1485 int
skmem_arena_mmap(struct skmem_arena * ar,struct proc * p,struct skmem_arena_mmap_info * ami)1486 skmem_arena_mmap(struct skmem_arena *ar, struct proc *p,
1487     struct skmem_arena_mmap_info *ami)
1488 {
1489 	task_t task = proc_task(p);
1490 	IOReturn ioerr;
1491 	int err = 0;
1492 
1493 	ASSERT(task != kernel_task && task != TASK_NULL);
1494 	ASSERT(ami->ami_arena == NULL);
1495 	ASSERT(ami->ami_mapref == NULL);
1496 	ASSERT(ami->ami_maptask == TASK_NULL);
1497 	ASSERT(!ami->ami_redirect);
1498 
1499 	AR_LOCK(ar);
1500 	if ((ar->ar_flags & (ARF_ACTIVE | ARF_DEFUNCT)) != ARF_ACTIVE) {
1501 		err = ENODEV;
1502 		goto failed;
1503 	}
1504 
1505 	ASSERT(ar->ar_ar != NULL);
1506 	if ((ami->ami_mapref = IOSKMapperCreate(ar->ar_ar, task)) == NULL) {
1507 		err = ENOMEM;
1508 		goto failed;
1509 	}
1510 
1511 	ioerr = IOSKMapperGetAddress(ami->ami_mapref, &ami->ami_mapaddr,
1512 	    &ami->ami_mapsize);
1513 	VERIFY(ioerr == kIOReturnSuccess);
1514 
1515 	ami->ami_arena = ar;
1516 	skmem_arena_retain_locked(ar);
1517 	SLIST_INSERT_HEAD(&ar->ar_map_head, ami, ami_link);
1518 
1519 	ami->ami_maptask = task;
1520 	ar->ar_mapcnt++;
1521 	if (ar->ar_mapcnt == 1) {
1522 		ar->ar_mapsize = ami->ami_mapsize;
1523 	}
1524 
1525 	ASSERT(ami->ami_mapref != NULL);
1526 	ASSERT(ami->ami_arena == ar);
1527 	AR_UNLOCK(ar);
1528 
1529 	return 0;
1530 
1531 failed:
1532 	AR_UNLOCK(ar);
1533 	skmem_arena_munmap(ar, ami);
1534 	VERIFY(err != 0);
1535 
1536 	return err;
1537 }
1538 
1539 /*
1540  * Remove arena's memory mapping from task's address space (common code).
1541  * Returns true if caller needs to perform a deferred defunct.
1542  */
1543 static boolean_t
skmem_arena_munmap_common(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1544 skmem_arena_munmap_common(struct skmem_arena *ar,
1545     struct skmem_arena_mmap_info *ami)
1546 {
1547 	boolean_t need_defunct = FALSE;
1548 
1549 	AR_LOCK(ar);
1550 	if (ami->ami_mapref != NULL) {
1551 		IOSKMapperDestroy(ami->ami_mapref);
1552 		ami->ami_mapref = NULL;
1553 
1554 		VERIFY(ar->ar_mapcnt != 0);
1555 		ar->ar_mapcnt--;
1556 		if (ar->ar_mapcnt == 0) {
1557 			ar->ar_mapsize = 0;
1558 		}
1559 
1560 		VERIFY(ami->ami_arena == ar);
1561 		SLIST_REMOVE(&ar->ar_map_head, ami, skmem_arena_mmap_info,
1562 		    ami_link);
1563 
1564 		/*
1565 		 * We expect that the caller ensures an extra reference
1566 		 * held on the arena, in addition to the one in mmap_info.
1567 		 */
1568 		VERIFY(ar->ar_refcnt > 1);
1569 		(void) skmem_arena_release_locked(ar);
1570 		ami->ami_arena = NULL;
1571 
1572 		if (ami->ami_redirect) {
1573 			/*
1574 			 * This mapper has been redirected; decrement
1575 			 * the redirect count associated with it.
1576 			 */
1577 			VERIFY(ar->ar_maprdrcnt != 0);
1578 			ar->ar_maprdrcnt--;
1579 		} else if (ar->ar_maprdrcnt != 0 &&
1580 		    ar->ar_maprdrcnt == ar->ar_mapcnt) {
1581 			/*
1582 			 * The are other mappers for this arena that have
1583 			 * all been redirected, but the arena wasn't marked
1584 			 * inactive by skmem_arena_redirect() last time since
1585 			 * this particular mapper that we just destroyed
1586 			 * was using it.  Now that it's gone, finish the
1587 			 * postponed work below once we return to caller.
1588 			 */
1589 			ASSERT(ar->ar_flags & ARF_ACTIVE);
1590 			ar->ar_flags &= ~ARF_ACTIVE;
1591 			need_defunct = TRUE;
1592 		}
1593 	}
1594 	ASSERT(ami->ami_mapref == NULL);
1595 	ASSERT(ami->ami_arena == NULL);
1596 
1597 	ami->ami_maptask = TASK_NULL;
1598 	ami->ami_mapaddr = 0;
1599 	ami->ami_mapsize = 0;
1600 	ami->ami_redirect = FALSE;
1601 
1602 	AR_UNLOCK(ar);
1603 
1604 	return need_defunct;
1605 }
1606 
1607 /*
1608  * Remove arena's memory mapping from task's address space (channel version).
1609  * Will perform a deferred defunct if needed.
1610  */
1611 void
skmem_arena_munmap_channel(struct skmem_arena * ar,struct kern_channel * ch)1612 skmem_arena_munmap_channel(struct skmem_arena *ar, struct kern_channel *ch)
1613 {
1614 	SK_LOCK_ASSERT_HELD();
1615 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1616 
1617 	/*
1618 	 * If this is this is on a channel that was holding the last
1619 	 * active reference count on the arena, and that there are
1620 	 * other defunct channels pointing to that arena, perform the
1621 	 * actual arena defunct now.
1622 	 */
1623 	if (skmem_arena_munmap_common(ar, &ch->ch_mmap)) {
1624 		struct kern_nexus *nx = ch->ch_nexus;
1625 		struct kern_nexus_domain_provider *nxdom_prov = NX_DOM_PROV(nx);
1626 
1627 		/*
1628 		 * Similar to kern_channel_defunct(), where we let the
1629 		 * domain provider complete the defunct.  At this point
1630 		 * both sk_lock and the channel locks are held, and so
1631 		 * we indicate that to the callee.
1632 		 */
1633 		nxdom_prov->nxdom_prov_dom->nxdom_defunct_finalize(nxdom_prov,
1634 		    nx, ch, TRUE);
1635 	}
1636 }
1637 
1638 /*
1639  * Remove arena's memory mapping from task's address space (generic).
1640  * This routine should only be called on non-channel related arenas.
1641  */
1642 void
skmem_arena_munmap(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1643 skmem_arena_munmap(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami)
1644 {
1645 	(void) skmem_arena_munmap_common(ar, ami);
1646 }
1647 
1648 /*
1649  * Redirect eligible memory regions in the task's memory map so that
1650  * they get overwritten and backed with anonymous (zero-filled) pages.
1651  */
1652 int
skmem_arena_mredirect(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami,struct proc * p,boolean_t * need_defunct)1653 skmem_arena_mredirect(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami,
1654     struct proc *p, boolean_t *need_defunct)
1655 {
1656 #pragma unused(p)
1657 	int err = 0;
1658 
1659 	*need_defunct = FALSE;
1660 
1661 	AR_LOCK(ar);
1662 	ASSERT(ar->ar_ar != NULL);
1663 	if (ami->ami_redirect) {
1664 		err = EALREADY;
1665 	} else if (ami->ami_mapref == NULL) {
1666 		err = ENXIO;
1667 	} else {
1668 		VERIFY(ar->ar_mapcnt != 0);
1669 		ASSERT(ar->ar_flags & ARF_ACTIVE);
1670 		VERIFY(ami->ami_arena == ar);
1671 		/*
1672 		 * This effectively overwrites the mappings for all
1673 		 * redirectable memory regions (i.e. those without the
1674 		 * SKMEM_REGION_CR_NOREDIRECT flag) while preserving their
1675 		 * protection flags.  Accesses to these regions will be
1676 		 * redirected to anonymous, zero-filled pages.
1677 		 */
1678 		IOSKMapperRedirect(ami->ami_mapref);
1679 		ami->ami_redirect = TRUE;
1680 
1681 		/*
1682 		 * Mark the arena as inactive if all mapper instances are
1683 		 * redirected; otherwise, we do this later during unmap.
1684 		 * Once inactive, the arena will not allow further mmap,
1685 		 * and it is ready to be defunct later.
1686 		 */
1687 		if (++ar->ar_maprdrcnt == ar->ar_mapcnt) {
1688 			ar->ar_flags &= ~ARF_ACTIVE;
1689 			*need_defunct = TRUE;
1690 		}
1691 	}
1692 	AR_UNLOCK(ar);
1693 
1694 	SK_DF(((err != 0) ? SK_VERB_ERROR : SK_VERB_DEFAULT),
1695 	    "%s(%d) \"%s\" ar 0x%llx flags %b inactive %u need_defunct %u "
1696 	    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ar->ar_name,
1697 	    SK_KVA(ar), ar->ar_flags, ARF_BITS, !(ar->ar_flags & ARF_ACTIVE),
1698 	    *need_defunct, err);
1699 
1700 	return err;
1701 }
1702 
1703 /*
1704  * Defunct a region.
1705  */
1706 int
skmem_arena_defunct(struct skmem_arena * ar)1707 skmem_arena_defunct(struct skmem_arena *ar)
1708 {
1709 	AR_LOCK(ar);
1710 
1711 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags 0x%b", ar->ar_name,
1712 	    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1713 
1714 	if (ar->ar_flags & ARF_DEFUNCT) {
1715 		AR_UNLOCK(ar);
1716 		return EALREADY;
1717 	} else if (ar->ar_flags & ARF_ACTIVE) {
1718 		AR_UNLOCK(ar);
1719 		return EBUSY;
1720 	}
1721 
1722 	/* purge the caches now */
1723 	skmem_arena_reap_locked(ar, TRUE);
1724 
1725 	/* teardown eligible memory regions and associated resources */
1726 	skmem_arena_teardown(ar, TRUE);
1727 
1728 	ar->ar_flags |= ARF_DEFUNCT;
1729 
1730 	AR_UNLOCK(ar);
1731 
1732 	return 0;
1733 }
1734 
1735 /*
1736  * Retrieve total and in-use memory statistics of regions in the arena.
1737  */
1738 void
skmem_arena_get_stats(struct skmem_arena * ar,uint64_t * mem_total,uint64_t * mem_inuse)1739 skmem_arena_get_stats(struct skmem_arena *ar, uint64_t *mem_total,
1740     uint64_t *mem_inuse)
1741 {
1742 	uint32_t i;
1743 
1744 	if (mem_total != NULL) {
1745 		*mem_total = 0;
1746 	}
1747 	if (mem_inuse != NULL) {
1748 		*mem_inuse = 0;
1749 	}
1750 
1751 	AR_LOCK(ar);
1752 	for (i = 0; i < SKMEM_REGIONS; i++) {
1753 		if (ar->ar_regions[i] == NULL) {
1754 			continue;
1755 		}
1756 
1757 		if (mem_total != NULL) {
1758 			*mem_total += AR_MEM_TOTAL(ar, i);
1759 		}
1760 		if (mem_inuse != NULL) {
1761 			*mem_inuse += AR_MEM_INUSE(ar, i);
1762 		}
1763 	}
1764 	AR_UNLOCK(ar);
1765 }
1766 
1767 /*
1768  * Retrieve the offset of a particular region (identified by its ID)
1769  * from the base of the arena.
1770  */
1771 mach_vm_offset_t
skmem_arena_get_region_offset(struct skmem_arena * ar,skmem_region_id_t id)1772 skmem_arena_get_region_offset(struct skmem_arena *ar, skmem_region_id_t id)
1773 {
1774 	mach_vm_offset_t offset = 0;
1775 	uint32_t i;
1776 
1777 	ASSERT(id < SKMEM_REGIONS);
1778 
1779 	AR_LOCK(ar);
1780 	for (i = 0; i < id; i++) {
1781 		if (ar->ar_regions[i] == NULL) {
1782 			continue;
1783 		}
1784 
1785 		offset += ar->ar_regions[i]->skr_size;
1786 	}
1787 	AR_UNLOCK(ar);
1788 
1789 	return offset;
1790 }
1791 
1792 /*
1793  * Reap all of configured caches in the arena, so that any excess amount
1794  * outside of their working sets gets released to their respective backing
1795  * regions.  If purging is specified, we empty the caches' working sets,
1796  * including everything that's cached at the CPU layer.
1797  */
1798 static void
skmem_arena_reap_locked(struct skmem_arena * ar,boolean_t purge)1799 skmem_arena_reap_locked(struct skmem_arena *ar, boolean_t purge)
1800 {
1801 	struct skmem_arena_nexus *arn;
1802 	struct skmem_arena_necp *arc;
1803 	struct kern_pbufpool *pp;
1804 
1805 	AR_LOCK_ASSERT_HELD(ar);
1806 
1807 	switch (ar->ar_type) {
1808 	case SKMEM_ARENA_TYPE_NEXUS:
1809 		arn = (struct skmem_arena_nexus *)ar;
1810 		if (arn->arn_schema_cache != NULL) {
1811 			skmem_cache_reap_now(arn->arn_schema_cache, purge);
1812 		}
1813 		if (arn->arn_ring_cache != NULL) {
1814 			skmem_cache_reap_now(arn->arn_ring_cache, purge);
1815 		}
1816 		if ((pp = arn->arn_rx_pp) != NULL) {
1817 			if (pp->pp_kmd_cache != NULL) {
1818 				skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1819 			}
1820 			if (pp->pp_buf_cache != NULL) {
1821 				skmem_cache_reap_now(pp->pp_buf_cache, purge);
1822 			}
1823 			if (pp->pp_kbft_cache != NULL) {
1824 				skmem_cache_reap_now(pp->pp_kbft_cache, purge);
1825 			}
1826 		}
1827 		if ((pp = arn->arn_tx_pp) != NULL && pp != arn->arn_rx_pp) {
1828 			if (pp->pp_kmd_cache != NULL) {
1829 				skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1830 			}
1831 			if (pp->pp_buf_cache != NULL) {
1832 				skmem_cache_reap_now(pp->pp_buf_cache, purge);
1833 			}
1834 			if (pp->pp_kbft_cache != NULL) {
1835 				skmem_cache_reap_now(pp->pp_kbft_cache, purge);
1836 			}
1837 		}
1838 		break;
1839 
1840 	case SKMEM_ARENA_TYPE_NECP:
1841 		arc = (struct skmem_arena_necp *)ar;
1842 		if (arc->arc_kstats_cache != NULL) {
1843 			skmem_cache_reap_now(arc->arc_kstats_cache, purge);
1844 		}
1845 		break;
1846 
1847 	case SKMEM_ARENA_TYPE_SYSTEM:
1848 		break;
1849 	}
1850 }
1851 
1852 void
skmem_arena_reap(struct skmem_arena * ar,boolean_t purge)1853 skmem_arena_reap(struct skmem_arena *ar, boolean_t purge)
1854 {
1855 	AR_LOCK(ar);
1856 	skmem_arena_reap_locked(ar, purge);
1857 	AR_UNLOCK(ar);
1858 }
1859 
1860 #if SK_LOG
1861 SK_LOG_ATTRIBUTE
1862 static void
skmem_arena_create_region_log(struct skmem_arena * ar)1863 skmem_arena_create_region_log(struct skmem_arena *ar)
1864 {
1865 	char label[32];
1866 	int i;
1867 
1868 	switch (ar->ar_type) {
1869 	case SKMEM_ARENA_TYPE_NEXUS:
1870 		SK_D("\"%s\" ar 0x%llx flags %b rx_pp 0x%llx tx_pp 0x%llu",
1871 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
1872 		    SK_KVA(skmem_arena_nexus(ar)->arn_rx_pp),
1873 		    SK_KVA(skmem_arena_nexus(ar)->arn_tx_pp));
1874 		break;
1875 
1876 	case SKMEM_ARENA_TYPE_NECP:
1877 	case SKMEM_ARENA_TYPE_SYSTEM:
1878 		SK_D("\"%s\" ar 0x%llx flags %b", ar->ar_name,
1879 		    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1880 		break;
1881 	}
1882 
1883 	for (i = 0; i < SKMEM_REGIONS; i++) {
1884 		if (ar->ar_regions[i] == NULL) {
1885 			continue;
1886 		}
1887 
1888 		(void) snprintf(label, sizeof(label), "REGION_%s:",
1889 		    skmem_region_id2name(i));
1890 		SK_D("  %-16s %6u KB s:[%2u x %6u KB] "
1891 		    "o:[%4u x %6u -> %4u x %6u]", label,
1892 		    (uint32_t)AR_MEM_TOTAL(ar, i) >> 10,
1893 		    (uint32_t)AR_MEM_SEGCNT(ar, i),
1894 		    (uint32_t)AR_MEM_SEGSIZE(ar, i) >> 10,
1895 		    (uint32_t)AR_MEM_OBJCNT_R(ar, i),
1896 		    (uint32_t)AR_MEM_OBJSIZE_R(ar, i),
1897 		    (uint32_t)AR_MEM_OBJCNT_C(ar, i),
1898 		    (uint32_t)AR_MEM_OBJSIZE_C(ar, i));
1899 	}
1900 }
1901 #endif /* SK_LOG */
1902 
1903 static size_t
skmem_arena_mib_get_stats(struct skmem_arena * ar,void * out,size_t len)1904 skmem_arena_mib_get_stats(struct skmem_arena *ar, void *out, size_t len)
1905 {
1906 	size_t actual_space = sizeof(struct sk_stats_arena);
1907 	struct sk_stats_arena *sar = out;
1908 	struct skmem_arena_mmap_info *ami = NULL;
1909 	pid_t proc_pid;
1910 	int i;
1911 
1912 	if (out == NULL || len < actual_space) {
1913 		goto done;
1914 	}
1915 
1916 	AR_LOCK(ar);
1917 	(void) snprintf(sar->sar_name, sizeof(sar->sar_name),
1918 	    "%s", ar->ar_name);
1919 	sar->sar_type = (sk_stats_arena_type_t)ar->ar_type;
1920 	sar->sar_mapsize = (uint64_t)ar->ar_mapsize;
1921 	i = 0;
1922 	SLIST_FOREACH(ami, &ar->ar_map_head, ami_link) {
1923 		if (ami->ami_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS) {
1924 			struct kern_channel *ch;
1925 			ch = container_of(ami, struct kern_channel, ch_mmap);
1926 			proc_pid = ch->ch_pid;
1927 		} else {
1928 			ASSERT((ami->ami_arena->ar_type ==
1929 			    SKMEM_ARENA_TYPE_NECP) ||
1930 			    (ami->ami_arena->ar_type ==
1931 			    SKMEM_ARENA_TYPE_SYSTEM));
1932 			proc_pid =
1933 			    necp_client_get_proc_pid_from_arena_info(ami);
1934 		}
1935 		sar->sar_mapped_pids[i++] = proc_pid;
1936 		if (i >= SK_STATS_ARENA_MAPPED_PID_MAX) {
1937 			break;
1938 		}
1939 	}
1940 
1941 	for (i = 0; i < SKMEM_REGIONS; i++) {
1942 		struct skmem_region *skr = ar->ar_regions[i];
1943 		uuid_t *sreg_uuid = &sar->sar_regions_uuid[i];
1944 
1945 		if (skr == NULL) {
1946 			uuid_clear(*sreg_uuid);
1947 			continue;
1948 		}
1949 
1950 		uuid_copy(*sreg_uuid, skr->skr_uuid);
1951 	}
1952 	AR_UNLOCK(ar);
1953 
1954 done:
1955 	return actual_space;
1956 }
1957 
1958 static int
1959 skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS
1960 {
1961 #pragma unused(arg1, arg2, oidp)
1962 	struct skmem_arena *ar;
1963 	size_t actual_space;
1964 	size_t buffer_space;
1965 	size_t allocated_space;
1966 	caddr_t buffer = NULL;
1967 	caddr_t scan;
1968 	int error = 0;
1969 
1970 	if (!kauth_cred_issuser(kauth_cred_get())) {
1971 		return EPERM;
1972 	}
1973 
1974 	net_update_uptime();
1975 	buffer_space = req->oldlen;
1976 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
1977 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
1978 			buffer_space = SK_SYSCTL_ALLOC_MAX;
1979 		}
1980 		allocated_space = buffer_space;
1981 		buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_arena_mib);
1982 		if (__improbable(buffer == NULL)) {
1983 			return ENOBUFS;
1984 		}
1985 	} else if (req->oldptr == USER_ADDR_NULL) {
1986 		buffer_space = 0;
1987 	}
1988 	actual_space = 0;
1989 	scan = buffer;
1990 
1991 	SKMEM_ARENA_LOCK();
1992 	TAILQ_FOREACH(ar, &skmem_arena_head, ar_link) {
1993 		size_t size = skmem_arena_mib_get_stats(ar, scan, buffer_space);
1994 		if (scan != NULL) {
1995 			if (buffer_space < size) {
1996 				/* supplied buffer too small, stop copying */
1997 				error = ENOMEM;
1998 				break;
1999 			}
2000 			scan += size;
2001 			buffer_space -= size;
2002 		}
2003 		actual_space += size;
2004 	}
2005 	SKMEM_ARENA_UNLOCK();
2006 
2007 	if (actual_space != 0) {
2008 		int out_error = SYSCTL_OUT(req, buffer, actual_space);
2009 		if (out_error != 0) {
2010 			error = out_error;
2011 		}
2012 	}
2013 	if (buffer != NULL) {
2014 		sk_free_data(buffer, allocated_space);
2015 	}
2016 
2017 	return error;
2018 }
2019