xref: /xnu-8019.80.24/bsd/skywalk/mem/skmem_arena.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* BEGIN CSTYLED */
30 /*
31  * SKMEM_ARENA_TYPE_NEXUS:
32  *
33  *   This arena represents the memory subsystem of a nexus adapter.  It consist
34  *   of a collection of memory regions that are usable by the nexus, as well
35  *   as the various caches for objects in those regions.
36  *
37  *       (1 per nexus adapter)
38  *     +=======================+
39  *     |      skmem_arena      |
40  *     +-----------------------+              (backing regions)
41  *     |     ar_regions[0]     |           +=======================+
42  *     :          ...          : ------->> |     skmem_region      |===+
43  *     |     ar_regions[n]     |           +=======================+   |===+
44  *     +=======================+               +=======================+   |
45  *     |     arn_{caches,pp}   | ---+              +=======================+
46  *     +-----------------------+    |
47  *     |     arn_stats_obj     |    |
48  *     |     arn_flowadv_obj   |    |         (cache frontends)
49  *     |     arn_nexusadv_obj  |    |      +=======================+
50  *     +-----------------------+    +--->> |     skmem_cache       |===+
51  *                                         +=======================+   |===+
52  *                                             +=======================+   |
53  *                                                 +=======================+
54  *
55  *   Three regions {umd,kmd,buf} are used for the packet buffer pool, which
56  *   may be external to the nexus adapter, e.g. created by the driver or an
57  *   external entity.  If not supplied, we create these regions along with
58  *   the packet buffer pool ourselves.  The rest of the regions (unrelated
59  *   to the packet buffer pool) are unique to the arena and are allocated at
60  *   arena creation time.
61  *
62  *   An arena may be mapped to a user task/process for as many times as needed.
63  *   The result of each mapping is a contiguous range within the address space
64  *   of that task, indicated by [ami_mapaddr, ami_mapaddr + ami_mapsize) span.
65  *   This is achieved by leveraging the mapper memory object ar_mapper that
66  *   "stitches" the disjoint segments together.  Only user-mappable regions,
67  *   i.e. those marked with SKR_MODE_MMAPOK, will be included in this span.
68  *
69  *   Nexus adapters that are eligible for defunct will trigger the arena to
70  *   undergo memory redirection for all regions except those that are marked
71  *   with SKR_MODE_NOREDIRECT.  This happens when all of the channels opened
72  *   to the adapter are defunct.  Upon completion, those redirected regions
73  *   will be torn down in order to reduce their memory footprints.  When this
74  *   happens the adapter and its arena are no longer active or in service.
75  *
76  *   The arena exposes caches for allocating and freeing most region objects.
77  *   These slab-allocator based caches act as front-ends to the regions; only
78  *   the metadata cache (for kern_packet_t) utilizes the magazines layer.  All
79  *   other ones simply utilize skmem_cache for slab-based allocations.
80  *
81  *   Certain regions contain singleton objects that are simple enough to not
82  *   require the slab allocator, such as the ones used for statistics and flow
83  *   advisories.  Because of this, we directly allocate from those regions
84  *   and store the objects in the arena.
85  *
86  * SKMEM_ARENA_TYPE_NECP:
87  *
88  *   This arena represents the memory subsystem of an NECP file descriptor
89  *   object.  It consists of a memory region for per-flow statistics, as well
90  *   as a cache front-end for that region.
91  *
92  * SKMEM_ARENA_SYSTEM:
93  *
94  *   This arena represents general, system-wide objects.  It currently
95  *   consists of the sysctls region that's created once at init time.
96  */
97 /* END CSTYLED */
98 
99 #include <skywalk/os_skywalk_private.h>
100 #include <net/necp.h>
101 
102 static void skmem_arena_destroy(struct skmem_arena *);
103 static void skmem_arena_teardown(struct skmem_arena *, boolean_t);
104 static int skmem_arena_create_finalize(struct skmem_arena *);
105 static void skmem_arena_nexus_teardown(struct skmem_arena_nexus *, boolean_t);
106 static void skmem_arena_necp_teardown(struct skmem_arena_necp *, boolean_t);
107 static void skmem_arena_system_teardown(struct skmem_arena_system *, boolean_t);
108 static struct skmem_arena *skmem_arena_alloc(skmem_arena_type_t,
109     const char *);
110 static void skmem_arena_free(struct skmem_arena *);
111 static void skmem_arena_retain_locked(struct skmem_arena *);
112 static void skmem_arena_reap_locked(struct skmem_arena *, boolean_t);
113 static boolean_t skmem_arena_munmap_common(struct skmem_arena *,
114     struct skmem_arena_mmap_info *);
115 #if SK_LOG
116 static void skmem_arena_create_region_log(struct skmem_arena *);
117 #endif /* SK_LOG */
118 static int skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS;
119 
120 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, arena,
121     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
122     0, 0, skmem_arena_mib_get_sysctl, "S,sk_stats_arena",
123     "Skywalk arena statistics");
124 
125 static LCK_GRP_DECLARE(skmem_arena_lock_grp, "skmem_arena");
126 static LCK_MTX_DECLARE(skmem_arena_lock, &skmem_arena_lock_grp);
127 
128 static TAILQ_HEAD(, skmem_arena) skmem_arena_head;
129 
130 #define SKMEM_ARENA_LOCK()                      \
131 	lck_mtx_lock(&skmem_arena_lock)
132 #define SKMEM_ARENA_LOCK_ASSERT_HELD()          \
133 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_OWNED)
134 #define SKMEM_ARENA_LOCK_ASSERT_NOTHELD()       \
135 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_NOTOWNED)
136 #define SKMEM_ARENA_UNLOCK()                    \
137 	lck_mtx_unlock(&skmem_arena_lock)
138 
139 #define AR_NEXUS_SIZE           sizeof(struct skmem_arena_nexus)
140 static ZONE_DECLARE(ar_nexus_zone, SKMEM_ZONE_PREFIX ".mem.arena.nexus",
141     AR_NEXUS_SIZE, ZC_ZFREE_CLEARMEM);
142 
143 #define AR_NECP_SIZE            sizeof(struct skmem_arena_necp)
144 static ZONE_DECLARE(ar_necp_zone, SKMEM_ZONE_PREFIX ".mem.arena.necp",
145     AR_NECP_SIZE, ZC_ZFREE_CLEARMEM);
146 
147 #define AR_SYSTEM_SIZE          sizeof(struct skmem_arena_system)
148 static ZONE_DECLARE(ar_system_zone, SKMEM_ZONE_PREFIX ".mem.arena.system",
149     AR_SYSTEM_SIZE, ZC_ZFREE_CLEARMEM);
150 
151 #define SKMEM_TAG_ARENA_MIB     "com.apple.skywalk.arena.mib"
152 static kern_allocation_name_t skmem_tag_arena_mib;
153 
154 void
skmem_arena_init(void)155 skmem_arena_init(void)
156 {
157 	_CASSERT(SKMEM_ARENA_TYPE_NEXUS == SAR_TYPE_NEXUS);
158 	_CASSERT(SKMEM_ARENA_TYPE_NECP == SAR_TYPE_NECP);
159 	_CASSERT(SKMEM_ARENA_TYPE_SYSTEM == SAR_TYPE_SYSTEM);
160 
161 	TAILQ_INIT(&skmem_arena_head);
162 
163 	ASSERT(skmem_tag_arena_mib == NULL);
164 	skmem_tag_arena_mib =
165 	    kern_allocation_name_allocate(SKMEM_TAG_ARENA_MIB, 0);
166 	ASSERT(skmem_tag_arena_mib != NULL);
167 }
168 
169 void
skmem_arena_fini(void)170 skmem_arena_fini(void)
171 {
172 	if (skmem_tag_arena_mib != NULL) {
173 		kern_allocation_name_release(skmem_tag_arena_mib);
174 		skmem_tag_arena_mib = NULL;
175 	}
176 }
177 
178 SK_NO_INLINE_ATTRIBUTE
179 static int
skmem_arena_sd_setup(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct skmem_arena * ar,boolean_t kernel_only,boolean_t tx)180 skmem_arena_sd_setup(const struct nexus_adapter *na,
181     struct skmem_region_params srp[SKMEM_REGIONS], struct skmem_arena *ar,
182     boolean_t kernel_only, boolean_t tx)
183 {
184 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
185 	struct skmem_cache **cachep;
186 	struct skmem_region *ksd_skr = NULL, *usd_skr = NULL;
187 	const char *name = na->na_name;
188 	char *fmt, cname[64];
189 	skmem_region_id_t usd_type, ksd_type;
190 	int err = 0;
191 
192 	usd_type = tx ? SKMEM_REGION_TXAUSD : SKMEM_REGION_RXFUSD;
193 	ksd_type = tx ? SKMEM_REGION_TXAKSD : SKMEM_REGION_RXFKSD;
194 	if (tx) {
195 		usd_type = SKMEM_REGION_TXAUSD;
196 		ksd_type = SKMEM_REGION_TXAKSD;
197 		cachep = &arn->arn_txaksd_cache;
198 		fmt = "txa_ksd.%s";
199 	} else {
200 		usd_type = SKMEM_REGION_RXFUSD;
201 		ksd_type = SKMEM_REGION_RXFKSD;
202 		cachep = &arn->arn_rxfksd_cache;
203 		fmt = "rxf_ksd.%s";
204 	}
205 	ksd_skr = skmem_region_create(name, &srp[ksd_type], NULL, NULL, NULL);
206 	if (ksd_skr == NULL) {
207 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
208 		    "create %s region", ar->ar_name, SK_KVA(ar),
209 		    ar->ar_flags, ARF_BITS, srp[ksd_type].srp_name);
210 		err = ENOMEM;
211 		goto failed;
212 	}
213 	ar->ar_regions[ksd_type] = ksd_skr;
214 	if (!kernel_only) {
215 		usd_skr = skmem_region_create(name, &srp[usd_type], NULL,
216 		    NULL, NULL);
217 		if (usd_skr == NULL) {
218 			err = ENOMEM;
219 			goto failed;
220 		}
221 		ar->ar_regions[usd_type] = usd_skr;
222 		skmem_region_mirror(ksd_skr, usd_skr);
223 	}
224 	(void) snprintf(cname, sizeof(cname), fmt, name);
225 	ASSERT(ar->ar_regions[ksd_type] != NULL);
226 	*cachep = skmem_cache_create(cname,
227 	    srp[ksd_type].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
228 	    ar->ar_regions[ksd_type], SKMEM_CR_NOMAGAZINES);
229 	if (*cachep == NULL) {
230 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
231 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
232 		err = ENOMEM;
233 		goto failed;
234 	}
235 	return 0;
236 
237 failed:
238 	if (ksd_skr != NULL) {
239 		skmem_region_release(ksd_skr);
240 		ar->ar_regions[ksd_type] = NULL;
241 	}
242 	if (usd_skr != NULL) {
243 		/*
244 		 * decrements refcnt incremented by skmem_region_mirror()
245 		 * this is not needed in case skmem_cache_create() succeeds
246 		 * because skmem_cache_destroy() does the release.
247 		 */
248 		skmem_region_release(usd_skr);
249 
250 		/* decrements the region's own refcnt */
251 		skmem_region_release(usd_skr);
252 		ar->ar_regions[usd_type] = NULL;
253 	}
254 	return err;
255 }
256 
257 SK_NO_INLINE_ATTRIBUTE
258 static void
skmem_arena_sd_teardown(struct skmem_arena * ar,boolean_t tx)259 skmem_arena_sd_teardown(struct skmem_arena *ar, boolean_t tx)
260 {
261 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
262 	struct skmem_cache **cachep;
263 	struct skmem_region **ksd_rp, **usd_rp;
264 
265 	if (tx) {
266 		cachep = &arn->arn_txaksd_cache;
267 		ksd_rp = &ar->ar_regions[SKMEM_REGION_TXAKSD];
268 		usd_rp = &ar->ar_regions[SKMEM_REGION_TXAUSD];
269 	} else {
270 		cachep = &arn->arn_rxfksd_cache;
271 		ksd_rp = &ar->ar_regions[SKMEM_REGION_RXFKSD];
272 		usd_rp = &ar->ar_regions[SKMEM_REGION_RXFUSD];
273 	}
274 	if (*cachep != NULL) {
275 		skmem_cache_destroy(*cachep);
276 		*cachep = NULL;
277 	}
278 	if (*usd_rp != NULL) {
279 		skmem_region_release(*usd_rp);
280 		*usd_rp = NULL;
281 	}
282 	if (*ksd_rp != NULL) {
283 		skmem_region_release(*ksd_rp);
284 		*ksd_rp = NULL;
285 	}
286 }
287 
288 static bool
skmem_arena_pp_setup(struct skmem_arena * ar,struct skmem_region_params srp[SKMEM_REGIONS],const char * name,struct kern_pbufpool * rx_pp,struct kern_pbufpool * tx_pp,boolean_t kernel_only,boolean_t pp_truncated_buf)289 skmem_arena_pp_setup(struct skmem_arena *ar,
290     struct skmem_region_params srp[SKMEM_REGIONS], const char *name,
291     struct kern_pbufpool *rx_pp, struct kern_pbufpool *tx_pp,
292     boolean_t kernel_only, boolean_t pp_truncated_buf)
293 {
294 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
295 
296 	if (rx_pp == NULL && tx_pp == NULL) {
297 		uint32_t ppcreatef = 0;
298 		if (pp_truncated_buf) {
299 			ppcreatef |= PPCREATEF_TRUNCATED_BUF;
300 		}
301 		if (kernel_only) {
302 			ppcreatef |= PPCREATEF_KERNEL_ONLY;
303 		}
304 		if (srp[SKMEM_REGION_KMD].srp_max_frags > 1) {
305 			ppcreatef |= PPCREATEF_ONDEMAND_BUF;
306 		}
307 		/* callee retains pp upon success */
308 		rx_pp = pp_create(name, &srp[SKMEM_REGION_BUF],
309 		    &srp[SKMEM_REGION_KMD], &srp[SKMEM_REGION_UMD],
310 		    &srp[SKMEM_REGION_KBFT], &srp[SKMEM_REGION_UBFT], NULL,
311 		    NULL, NULL, NULL, NULL, ppcreatef);
312 		if (rx_pp == NULL) {
313 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create pp",
314 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
315 			return false;
316 		}
317 		pp_retain(rx_pp);
318 		tx_pp = rx_pp;
319 	} else {
320 		if (rx_pp == NULL) {
321 			rx_pp = tx_pp;
322 		} else if (tx_pp == NULL) {
323 			tx_pp = rx_pp;
324 		}
325 
326 		ASSERT(rx_pp->pp_md_type == tx_pp->pp_md_type);
327 		ASSERT(rx_pp->pp_md_subtype == tx_pp->pp_md_subtype);
328 		ASSERT(!(!kernel_only &&
329 		    (PP_KERNEL_ONLY(rx_pp) || (PP_KERNEL_ONLY(tx_pp)))));
330 		arn->arn_mode |= AR_NEXUS_MODE_EXTERNAL_PPOOL;
331 		pp_retain(rx_pp);
332 		pp_retain(tx_pp);
333 	}
334 
335 	arn->arn_rx_pp = rx_pp;
336 	arn->arn_tx_pp = tx_pp;
337 	if (rx_pp == tx_pp) {
338 		skmem_region_retain(rx_pp->pp_buf_region);
339 		ar->ar_regions[SKMEM_REGION_BUF] = rx_pp->pp_buf_region;
340 		ar->ar_regions[SKMEM_REGION_RXBUF] = NULL;
341 		ar->ar_regions[SKMEM_REGION_TXBUF] = NULL;
342 		skmem_region_retain(rx_pp->pp_kmd_region);
343 		ar->ar_regions[SKMEM_REGION_KMD] = rx_pp->pp_kmd_region;
344 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
345 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
346 		if (rx_pp->pp_kbft_region != NULL) {
347 			skmem_region_retain(rx_pp->pp_kbft_region);
348 			ar->ar_regions[SKMEM_REGION_KBFT] =
349 			    rx_pp->pp_kbft_region;
350 		}
351 		ar->ar_regions[SKMEM_REGION_RXKBFT] = NULL;
352 		ar->ar_regions[SKMEM_REGION_TXKBFT] = NULL;
353 	} else {
354 		ASSERT(kernel_only); /* split userspace pools not supported */
355 		ar->ar_regions[SKMEM_REGION_BUF] = NULL;
356 		skmem_region_retain(rx_pp->pp_buf_region);
357 		ar->ar_regions[SKMEM_REGION_RXBUF] = rx_pp->pp_buf_region;
358 		skmem_region_retain(tx_pp->pp_buf_region);
359 		ar->ar_regions[SKMEM_REGION_TXBUF] = tx_pp->pp_buf_region;
360 		ar->ar_regions[SKMEM_REGION_KMD] = NULL;
361 		skmem_region_retain(rx_pp->pp_kmd_region);
362 		ar->ar_regions[SKMEM_REGION_RXKMD] = rx_pp->pp_kmd_region;
363 		skmem_region_retain(tx_pp->pp_kmd_region);
364 		ar->ar_regions[SKMEM_REGION_TXKMD] = tx_pp->pp_kmd_region;
365 		ar->ar_regions[SKMEM_REGION_KBFT] = NULL;
366 		if (rx_pp->pp_kbft_region != NULL) {
367 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(rx_pp));
368 			skmem_region_retain(rx_pp->pp_kbft_region);
369 			ar->ar_regions[SKMEM_REGION_RXKBFT] =
370 			    rx_pp->pp_kbft_region;
371 		}
372 		if (tx_pp->pp_kbft_region != NULL) {
373 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(tx_pp));
374 			skmem_region_retain(tx_pp->pp_kbft_region);
375 			ar->ar_regions[SKMEM_REGION_TXKBFT] =
376 			    tx_pp->pp_kbft_region;
377 		}
378 	}
379 
380 	if (kernel_only) {
381 		if ((arn->arn_mode & AR_NEXUS_MODE_EXTERNAL_PPOOL) == 0) {
382 			ASSERT(PP_KERNEL_ONLY(rx_pp));
383 			ASSERT(PP_KERNEL_ONLY(tx_pp));
384 			ASSERT(rx_pp->pp_umd_region == NULL);
385 			ASSERT(tx_pp->pp_umd_region == NULL);
386 			ASSERT(rx_pp->pp_kmd_region->skr_mirror == NULL);
387 			ASSERT(tx_pp->pp_kmd_region->skr_mirror == NULL);
388 			ASSERT(rx_pp->pp_ubft_region == NULL);
389 			ASSERT(tx_pp->pp_ubft_region == NULL);
390 			if (rx_pp->pp_kbft_region != NULL) {
391 				ASSERT(rx_pp->pp_kbft_region->skr_mirror ==
392 				    NULL);
393 			}
394 			if (tx_pp->pp_kbft_region != NULL) {
395 				ASSERT(tx_pp->pp_kbft_region->skr_mirror ==
396 				    NULL);
397 			}
398 		}
399 	} else {
400 		ASSERT(rx_pp == tx_pp);
401 		ASSERT(!PP_KERNEL_ONLY(rx_pp));
402 		ASSERT(rx_pp->pp_umd_region->skr_mode & SKR_MODE_MIRRORED);
403 		ASSERT(rx_pp->pp_kmd_region->skr_mirror != NULL);
404 		ar->ar_regions[SKMEM_REGION_UMD] = rx_pp->pp_umd_region;
405 		skmem_region_retain(rx_pp->pp_umd_region);
406 		if (rx_pp->pp_kbft_region != NULL) {
407 			ASSERT(rx_pp->pp_kbft_region->skr_mirror != NULL);
408 			ASSERT(rx_pp->pp_ubft_region != NULL);
409 			ASSERT(rx_pp->pp_ubft_region->skr_mode &
410 			    SKR_MODE_MIRRORED);
411 			ar->ar_regions[SKMEM_REGION_UBFT] =
412 			    rx_pp->pp_ubft_region;
413 			skmem_region_retain(rx_pp->pp_ubft_region);
414 		}
415 	}
416 
417 	arn->arn_md_type = rx_pp->pp_md_type;
418 	arn->arn_md_subtype = rx_pp->pp_md_subtype;
419 	return true;
420 }
421 
422 /*
423  * Create a nexus adapter arena.
424  */
425 struct skmem_arena *
skmem_arena_create_for_nexus(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct kern_pbufpool ** tx_pp,struct kern_pbufpool ** rx_pp,boolean_t pp_truncated_buf,boolean_t kernel_only,struct kern_nexus_advisory * nxv,int * perr)426 skmem_arena_create_for_nexus(const struct nexus_adapter *na,
427     struct skmem_region_params srp[SKMEM_REGIONS], struct kern_pbufpool **tx_pp,
428     struct kern_pbufpool **rx_pp, boolean_t pp_truncated_buf,
429     boolean_t kernel_only, struct kern_nexus_advisory *nxv, int *perr)
430 {
431 #define SRP_CFLAGS(_id)         (srp[_id].srp_cflags)
432 	struct skmem_arena_nexus *arn;
433 	struct skmem_arena *ar;
434 	char cname[64];
435 	uint32_t i;
436 	const char *name = na->na_name;
437 
438 	*perr = 0;
439 
440 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NEXUS, name);
441 	ASSERT(ar != NULL && ar->ar_zsize == AR_NEXUS_SIZE);
442 	arn = (struct skmem_arena_nexus *)ar;
443 
444 	/* these regions must not be readable/writeable */
445 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_GUARD);
446 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_GUARD);
447 
448 	/* these regions must be read-only */
449 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_UREADONLY);
450 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_UREADONLY);
451 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_UREADONLY);
452 	if ((na->na_flags & NAF_USER_PKT_POOL) == 0) {
453 		ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
454 		    SKMEM_REGION_CR_UREADONLY);
455 		ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
456 		    SKMEM_REGION_CR_UREADONLY);
457 	} else {
458 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
459 		    SKMEM_REGION_CR_UREADONLY));
460 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
461 		    SKMEM_REGION_CR_UREADONLY));
462 	}
463 
464 	/* these regions must be user-mappable */
465 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_MMAPOK);
466 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_MMAPOK);
467 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_MMAPOK);
468 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF) & SKMEM_REGION_CR_MMAPOK);
469 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_MMAPOK);
470 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_MMAPOK);
471 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_MMAPOK);
472 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_MMAPOK);
473 	ASSERT(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_MMAPOK);
474 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_MMAPOK);
475 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_MMAPOK);
476 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_MMAPOK);
477 
478 	/* these must not be user-mappable */
479 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MMAPOK));
480 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MMAPOK));
481 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MMAPOK));
482 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MMAPOK));
483 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MMAPOK));
484 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MMAPOK));
485 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_MMAPOK));
486 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_MMAPOK));
487 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_MMAPOK));
488 
489 	/* these regions must be shareable */
490 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF) & SKMEM_REGION_CR_SHAREOK);
491 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF) & SKMEM_REGION_CR_SHAREOK);
492 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF) & SKMEM_REGION_CR_SHAREOK);
493 
494 	/* these regions must not be be shareable */
495 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_SHAREOK));
496 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_SHAREOK));
497 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_SHAREOK));
498 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_SHAREOK));
499 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_SHAREOK));
500 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_SHAREOK));
501 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_SHAREOK));
502 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_SHAREOK));
503 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_SHAREOK));
504 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_SHAREOK));
505 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_SHAREOK));
506 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_SHAREOK));
507 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_SHAREOK));
508 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_SHAREOK));
509 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_SHAREOK));
510 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_SHAREOK));
511 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_SHAREOK));
512 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_SHAREOK));
513 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_SHAREOK));
514 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_SHAREOK));
515 
516 	/* these must stay active */
517 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_NOREDIRECT);
518 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_NOREDIRECT);
519 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_NOREDIRECT);
520 
521 	/* no kstats for nexus */
522 	ASSERT(srp[SKMEM_REGION_KSTATS].srp_c_obj_cnt == 0);
523 
524 	AR_LOCK(ar);
525 	if (!skmem_arena_pp_setup(ar, srp, name, (rx_pp ? *rx_pp : NULL),
526 	    (tx_pp ? *tx_pp : NULL), kernel_only, pp_truncated_buf)) {
527 		goto failed;
528 	}
529 
530 	if (nxv != NULL && nxv->nxv_reg != NULL) {
531 		struct skmem_region *svr = nxv->nxv_reg;
532 
533 		ASSERT(svr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
534 		ASSERT(svr->skr_seg_max_cnt == 1);
535 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = svr;
536 		skmem_region_retain(svr);
537 
538 		ASSERT(nxv->nxv_adv != NULL);
539 		if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
540 			VERIFY(nxv->flowswitch_nxv_adv->nxadv_ver ==
541 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
542 		} else if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
543 			VERIFY(nxv->netif_nxv_adv->nna_version ==
544 			    NX_NETIF_ADVISORY_CURRENT_VERSION);
545 		} else {
546 			panic_plain("%s: invalid advisory type %d",
547 			    __func__, nxv->nxv_adv_type);
548 			/* NOTREACHED */
549 		}
550 		arn->arn_nexusadv_obj = nxv->nxv_adv;
551 	} else {
552 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
553 		ASSERT(srp[SKMEM_REGION_NEXUSADV].srp_c_obj_cnt == 0);
554 	}
555 
556 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, TRUE) != 0) {
557 		goto failed;
558 	}
559 
560 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, FALSE) != 0) {
561 		goto failed;
562 	}
563 
564 	for (i = 0; i < SKMEM_REGIONS; i++) {
565 		/* skip if already created */
566 		if (ar->ar_regions[i] != NULL) {
567 			continue;
568 		}
569 
570 		/* skip external regions from packet pool */
571 		if (skmem_region_for_pp(i)) {
572 			continue;
573 		}
574 
575 		/* skip slot descriptor regions */
576 		if (i == SKMEM_REGION_TXAUSD || i == SKMEM_REGION_RXFUSD ||
577 		    i == SKMEM_REGION_TXAKSD || i == SKMEM_REGION_RXFKSD) {
578 			continue;
579 		}
580 
581 		/* skip if region is configured to be empty */
582 		if (srp[i].srp_c_obj_cnt == 0) {
583 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
584 			    i == SKMEM_REGION_USTATS ||
585 			    i == SKMEM_REGION_KSTATS ||
586 			    i == SKMEM_REGION_INTRINSIC ||
587 			    i == SKMEM_REGION_FLOWADV ||
588 			    i == SKMEM_REGION_NEXUSADV ||
589 			    i == SKMEM_REGION_SYSCTLS ||
590 			    i == SKMEM_REGION_GUARD_TAIL);
591 			continue;
592 		}
593 
594 		ASSERT(srp[i].srp_id == i);
595 
596 		/*
597 		 * Skip {SCHEMA, RING, GUARD} for kernel-only arena.  Note
598 		 * that this is assuming kernel-only arena is always used
599 		 * for kernel-only nexus adapters (never used directly by
600 		 * user process.)
601 		 *
602 		 * XXX [email protected] - see comments in kern_pbufpool_create().
603 		 * We need to revisit this logic for "direct channel" access,
604 		 * perhaps via a separate adapter flag.
605 		 */
606 		if (kernel_only && (i == SKMEM_REGION_GUARD_HEAD ||
607 		    i == SKMEM_REGION_SCHEMA || i == SKMEM_REGION_RING ||
608 		    i == SKMEM_REGION_GUARD_TAIL)) {
609 			continue;
610 		}
611 
612 		/* not for nexus, or for us to create here */
613 		ASSERT(i != SKMEM_REGION_GUARD_HEAD || sk_guard);
614 		ASSERT(i != SKMEM_REGION_NEXUSADV);
615 		ASSERT(i != SKMEM_REGION_SYSCTLS);
616 		ASSERT(i != SKMEM_REGION_GUARD_TAIL || sk_guard);
617 		ASSERT(i != SKMEM_REGION_KSTATS);
618 		ASSERT(i != SKMEM_REGION_INTRINSIC);
619 
620 		/* otherwise create it */
621 		if ((ar->ar_regions[i] = skmem_region_create(name, &srp[i],
622 		    NULL, NULL, NULL)) == NULL) {
623 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
624 			    "create %s region", ar->ar_name, SK_KVA(ar),
625 			    ar->ar_flags, ARF_BITS, srp[i].srp_name);
626 			goto failed;
627 		}
628 	}
629 
630 	/* create skmem_cache for schema (without magazines) */
631 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL || kernel_only);
632 	if (ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL) {
633 		(void) snprintf(cname, sizeof(cname), "schema.%s", name);
634 		if ((arn->arn_schema_cache = skmem_cache_create(cname,
635 		    srp[SKMEM_REGION_SCHEMA].srp_c_obj_size, 0, NULL, NULL,
636 		    NULL, NULL, ar->ar_regions[SKMEM_REGION_SCHEMA],
637 		    SKMEM_CR_NOMAGAZINES)) == NULL) {
638 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
639 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
640 			    cname);
641 			goto failed;
642 		}
643 	}
644 
645 	/* create skmem_cache for rings (without magazines) */
646 	(void) snprintf(cname, sizeof(cname), "ring.%s", name);
647 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] != NULL || kernel_only);
648 	if ((ar->ar_regions[SKMEM_REGION_RING] != NULL) &&
649 	    (arn->arn_ring_cache = skmem_cache_create(cname,
650 	    srp[SKMEM_REGION_RING].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
651 	    ar->ar_regions[SKMEM_REGION_RING], SKMEM_CR_NOMAGAZINES)) == NULL) {
652 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
653 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
654 		goto failed;
655 	}
656 
657 	/*
658 	 * If the stats region is present, allocate a single object directly
659 	 * from the region; we don't need to create an skmem_cache for this,
660 	 * as the object is allocated (and freed) only once.
661 	 */
662 	if (ar->ar_regions[SKMEM_REGION_USTATS] != NULL) {
663 		struct skmem_region *str = ar->ar_regions[SKMEM_REGION_USTATS];
664 
665 		/* no kstats for nexus */
666 		ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
667 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
668 		ASSERT(str->skr_seg_max_cnt == 1);
669 
670 		if ((arn->arn_stats_obj = skmem_region_alloc(str, NULL,
671 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
672 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
673 			    "stats", ar->ar_name, SK_KVA(ar), ar->ar_flags,
674 			    ARF_BITS);
675 			goto failed;
676 		}
677 	}
678 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
679 
680 	/*
681 	 * If the flowadv region is present, allocate a single object directly
682 	 * from the region; we don't need to create an skmem_cache for this,
683 	 * as the object is allocated (and freed) only once.
684 	 */
685 	if (ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL) {
686 		struct skmem_region *str =
687 		    ar->ar_regions[SKMEM_REGION_FLOWADV];
688 
689 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
690 		ASSERT(str->skr_seg_max_cnt == 1);
691 
692 		if ((arn->arn_flowadv_obj = skmem_region_alloc(str, NULL,
693 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
694 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
695 			    "flowadv", ar->ar_name, SK_KVA(ar), ar->ar_flags,
696 			    ARF_BITS);
697 			goto failed;
698 		}
699 	}
700 
701 	if (skmem_arena_create_finalize(ar) != 0) {
702 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
703 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
704 		goto failed;
705 	}
706 
707 	++ar->ar_refcnt;        /* for caller */
708 	AR_UNLOCK(ar);
709 
710 	SKMEM_ARENA_LOCK();
711 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
712 	SKMEM_ARENA_UNLOCK();
713 
714 	/* caller didn't give us one, but would like us to return it? */
715 	if (rx_pp != NULL && *rx_pp == NULL) {
716 		*rx_pp = arn->arn_rx_pp;
717 		pp_retain(*rx_pp);
718 	}
719 	if (tx_pp != NULL && *tx_pp == NULL) {
720 		*tx_pp = arn->arn_tx_pp;
721 		pp_retain(*tx_pp);  /* for caller */
722 	}
723 
724 #if SK_LOG
725 	if (__improbable(sk_verbose != 0)) {
726 		skmem_arena_create_region_log(ar);
727 	}
728 #endif /* SK_LOG */
729 
730 	return ar;
731 
732 failed:
733 	AR_LOCK_ASSERT_HELD(ar);
734 	skmem_arena_destroy(ar);
735 	*perr = ENOMEM;
736 
737 	return NULL;
738 #undef SRP_CFLAGS
739 }
740 
741 void
skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus * arn,int cnt)742 skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus *arn, int cnt)
743 {
744 	struct skmem_arena *ar = &arn->arn_cmn;
745 
746 	AR_LOCK(ar);
747 	arn->arn_ksd_nodefunct += cnt;
748 	VERIFY(arn->arn_ksd_nodefunct >= 0);
749 	AR_UNLOCK(ar);
750 }
751 
752 boolean_t
skmem_arena_nexus_sd_idle(struct skmem_arena_nexus * arn)753 skmem_arena_nexus_sd_idle(struct skmem_arena_nexus *arn)
754 {
755 	struct skmem_arena *ar = &arn->arn_cmn;
756 	boolean_t idle;
757 
758 	AR_LOCK(ar);
759 	VERIFY(arn->arn_ksd_nodefunct >= 0);
760 	idle = (arn->arn_ksd_nodefunct == 0);
761 	AR_UNLOCK(ar);
762 
763 	return idle;
764 }
765 
766 static void
skmem_arena_nexus_teardown(struct skmem_arena_nexus * arn,boolean_t defunct)767 skmem_arena_nexus_teardown(struct skmem_arena_nexus *arn, boolean_t defunct)
768 {
769 	struct skmem_arena *ar = &arn->arn_cmn;
770 	struct skmem_region *skr;
771 	int i;
772 
773 	AR_LOCK_ASSERT_HELD(ar);
774 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
775 
776 	/* these should never be set for nexus arena */
777 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL || sk_guard);
778 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
779 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL || sk_guard);
780 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
781 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
782 
783 	if (arn->arn_stats_obj != NULL) {
784 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
785 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
786 		skmem_region_free(skr, arn->arn_stats_obj, NULL);
787 		arn->arn_stats_obj = NULL;
788 		skmem_region_release(skr);
789 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
790 	}
791 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
792 	ASSERT(arn->arn_stats_obj == NULL);
793 
794 	if (arn->arn_flowadv_obj != NULL) {
795 		skr = ar->ar_regions[SKMEM_REGION_FLOWADV];
796 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
797 		skmem_region_free(skr, arn->arn_flowadv_obj, NULL);
798 		arn->arn_flowadv_obj = NULL;
799 		skmem_region_release(skr);
800 		ar->ar_regions[SKMEM_REGION_FLOWADV] = NULL;
801 	}
802 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
803 	ASSERT(arn->arn_flowadv_obj == NULL);
804 
805 	if (arn->arn_nexusadv_obj != NULL) {
806 		skr = ar->ar_regions[SKMEM_REGION_NEXUSADV];
807 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
808 		/* we didn't allocate this, so just nullify it */
809 		arn->arn_nexusadv_obj = NULL;
810 		skmem_region_release(skr);
811 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = NULL;
812 	}
813 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
814 	ASSERT(arn->arn_nexusadv_obj == NULL);
815 
816 	ASSERT(!((arn->arn_rx_pp == NULL) ^ (arn->arn_tx_pp == NULL)));
817 	if (arn->arn_rx_pp != NULL) {
818 		for (i = 0; i < SKMEM_PP_REGIONS; i++) {
819 			skmem_region_id_t reg = skmem_pp_region_ids[i];
820 			skr = ar->ar_regions[reg];
821 			if (skr != NULL) {
822 				ASSERT(!(skr->skr_mode & SKR_MODE_NOREDIRECT));
823 				skmem_region_release(skr);
824 				ar->ar_regions[reg] = NULL;
825 			}
826 		}
827 		pp_release(arn->arn_rx_pp);
828 		pp_release(arn->arn_tx_pp);
829 		arn->arn_rx_pp = NULL;
830 		arn->arn_tx_pp = NULL;
831 	}
832 	for (i = 0; i < SKMEM_PP_REGIONS; i++) {
833 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
834 	}
835 	ASSERT(arn->arn_rx_pp == NULL);
836 	ASSERT(arn->arn_tx_pp == NULL);
837 
838 	if (arn->arn_ring_cache != NULL) {
839 		skr = ar->ar_regions[SKMEM_REGION_RING];
840 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
841 		skmem_cache_destroy(arn->arn_ring_cache);
842 		arn->arn_ring_cache = NULL;
843 		skmem_region_release(skr);
844 		ar->ar_regions[SKMEM_REGION_RING] = NULL;
845 	}
846 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
847 	ASSERT(arn->arn_ring_cache == NULL);
848 
849 	/*
850 	 * Stop here if we're in the defunct context, and we're asked
851 	 * to keep the slot descriptor regions alive as they are still
852 	 * being referred to by the nexus owner (driver).
853 	 */
854 	if (defunct && arn->arn_ksd_nodefunct != 0) {
855 		ASSERT(arn->arn_ksd_nodefunct > 0);
856 		return;
857 	}
858 
859 	ASSERT(arn->arn_ksd_nodefunct == 0);
860 	skmem_arena_sd_teardown(ar, TRUE);
861 	skmem_arena_sd_teardown(ar, FALSE);
862 
863 	/* stop here if we're in the defunct context */
864 	if (defunct) {
865 		return;
866 	}
867 	if (arn->arn_schema_cache != NULL) {
868 		skr = ar->ar_regions[SKMEM_REGION_SCHEMA];
869 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
870 		skmem_cache_destroy(arn->arn_schema_cache);
871 		arn->arn_schema_cache = NULL;
872 		skmem_region_release(skr);
873 		ar->ar_regions[SKMEM_REGION_SCHEMA] = NULL;
874 	}
875 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
876 	ASSERT(arn->arn_schema_cache == NULL);
877 
878 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_HEAD]) != NULL) {
879 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
880 		skmem_region_release(skr);
881 		ar->ar_regions[SKMEM_REGION_GUARD_HEAD] = NULL;
882 	}
883 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
884 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_TAIL]) != NULL) {
885 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
886 		skmem_region_release(skr);
887 		ar->ar_regions[SKMEM_REGION_GUARD_TAIL] = NULL;
888 	}
889 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
890 }
891 
892 /*
893  * Create an NECP arena.
894  */
895 struct skmem_arena *
skmem_arena_create_for_necp(const char * name,struct skmem_region_params * srp_ustats,struct skmem_region_params * srp_kstats,int * perr)896 skmem_arena_create_for_necp(const char *name,
897     struct skmem_region_params *srp_ustats,
898     struct skmem_region_params *srp_kstats, int *perr)
899 {
900 	struct skmem_arena_necp *arc;
901 	struct skmem_arena *ar;
902 	char cname[64];
903 
904 	*perr = 0;
905 
906 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NECP, name);
907 	ASSERT(ar != NULL && ar->ar_zsize == AR_NECP_SIZE);
908 	arc = (struct skmem_arena_necp *)ar;
909 
910 	/*
911 	 * Must be stats region, and must be user-mappable;
912 	 * don't assert for SKMEM_REGION_CR_MONOLITHIC here
913 	 * as the client might want multi-segment mode.
914 	 */
915 	ASSERT(srp_ustats->srp_id == SKMEM_REGION_USTATS);
916 	ASSERT(srp_kstats->srp_id == SKMEM_REGION_KSTATS);
917 	ASSERT(srp_ustats->srp_cflags & SKMEM_REGION_CR_MMAPOK);
918 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_MMAPOK));
919 	ASSERT(!(srp_ustats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
920 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
921 	ASSERT(srp_ustats->srp_c_obj_size != 0);
922 	ASSERT(srp_kstats->srp_c_obj_size != 0);
923 	ASSERT(srp_ustats->srp_c_obj_cnt != 0);
924 	ASSERT(srp_kstats->srp_c_obj_cnt != 0);
925 	ASSERT(srp_ustats->srp_c_seg_size == srp_kstats->srp_c_seg_size);
926 	ASSERT(srp_ustats->srp_seg_cnt == srp_kstats->srp_seg_cnt);
927 	ASSERT(srp_ustats->srp_c_obj_size == srp_kstats->srp_c_obj_size);
928 	ASSERT(srp_ustats->srp_c_obj_cnt == srp_kstats->srp_c_obj_cnt);
929 
930 	AR_LOCK(ar);
931 
932 	if ((ar->ar_regions[SKMEM_REGION_USTATS] = skmem_region_create(name,
933 	    srp_ustats, NULL, NULL, NULL)) == NULL) {
934 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
935 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
936 		    srp_ustats->srp_name);
937 		goto failed;
938 	}
939 
940 	if ((ar->ar_regions[SKMEM_REGION_KSTATS] = skmem_region_create(name,
941 	    srp_kstats, NULL, NULL, NULL)) == NULL) {
942 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
943 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
944 		    srp_kstats->srp_name);
945 		goto failed;
946 	}
947 
948 	skmem_region_mirror(ar->ar_regions[SKMEM_REGION_KSTATS],
949 	    ar->ar_regions[SKMEM_REGION_USTATS]);
950 
951 	/* create skmem_cache for kernel stats (without magazines) */
952 	(void) snprintf(cname, sizeof(cname), "kstats.%s", name);
953 	if ((arc->arc_kstats_cache = skmem_cache_create(cname,
954 	    srp_kstats->srp_c_obj_size, 0, necp_stats_ctor, NULL, NULL, NULL,
955 	    ar->ar_regions[SKMEM_REGION_KSTATS],
956 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
957 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
958 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
959 		goto failed;
960 	}
961 
962 	if (skmem_arena_create_finalize(ar) != 0) {
963 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
964 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
965 		goto failed;
966 	}
967 
968 	/*
969 	 * These must never be configured for NECP arena.
970 	 *
971 	 * XXX: In theory we can add guard pages to this arena,
972 	 * but for now leave that as an exercise for the future.
973 	 */
974 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
975 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
976 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
977 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
978 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
979 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
980 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
981 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
982 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
983 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
984 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
985 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
986 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
987 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
988 	}
989 
990 	/* these must be configured for NECP arena */
991 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
992 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] != NULL);
993 
994 	++ar->ar_refcnt;        /* for caller */
995 	AR_UNLOCK(ar);
996 
997 	SKMEM_ARENA_LOCK();
998 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
999 	SKMEM_ARENA_UNLOCK();
1000 
1001 #if SK_LOG
1002 	if (__improbable(sk_verbose != 0)) {
1003 		skmem_arena_create_region_log(ar);
1004 	}
1005 #endif /* SK_LOG */
1006 
1007 	return ar;
1008 
1009 failed:
1010 	AR_LOCK_ASSERT_HELD(ar);
1011 	skmem_arena_destroy(ar);
1012 	*perr = ENOMEM;
1013 
1014 	return NULL;
1015 }
1016 
1017 static void
skmem_arena_necp_teardown(struct skmem_arena_necp * arc,boolean_t defunct)1018 skmem_arena_necp_teardown(struct skmem_arena_necp *arc, boolean_t defunct)
1019 {
1020 #pragma unused(defunct)
1021 	struct skmem_arena *ar = &arc->arc_cmn;
1022 	struct skmem_region *skr;
1023 
1024 	AR_LOCK_ASSERT_HELD(ar);
1025 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NECP);
1026 
1027 	/* these must never be configured for NECP arena */
1028 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1029 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1030 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1031 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1032 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1033 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1034 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1035 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1036 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1037 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1038 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1039 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1040 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1041 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1042 	}
1043 
1044 	if (arc->arc_kstats_cache != NULL) {
1045 		skr = ar->ar_regions[SKMEM_REGION_KSTATS];
1046 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1047 		skmem_cache_destroy(arc->arc_kstats_cache);
1048 		arc->arc_kstats_cache = NULL;
1049 		skmem_region_release(skr);
1050 		ar->ar_regions[SKMEM_REGION_KSTATS] = NULL;
1051 
1052 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
1053 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1054 		skmem_region_release(skr);
1055 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
1056 	}
1057 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1058 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1059 	ASSERT(arc->arc_kstats_cache == NULL);
1060 }
1061 
1062 /*
1063  * Given an arena, return its NECP variant (if applicable).
1064  */
1065 struct skmem_arena_necp *
skmem_arena_necp(struct skmem_arena * ar)1066 skmem_arena_necp(struct skmem_arena *ar)
1067 {
1068 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_NECP)) {
1069 		return NULL;
1070 	}
1071 
1072 	return (struct skmem_arena_necp *)ar;
1073 }
1074 
1075 /*
1076  * Create a System arena.
1077  */
1078 struct skmem_arena *
skmem_arena_create_for_system(const char * name,int * perr)1079 skmem_arena_create_for_system(const char *name, int *perr)
1080 {
1081 	struct skmem_region *skrsys;
1082 	struct skmem_arena_system *ars;
1083 	struct skmem_arena *ar;
1084 
1085 	*perr = 0;
1086 
1087 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_SYSTEM, name);
1088 	ASSERT(ar != NULL && ar->ar_zsize == AR_SYSTEM_SIZE);
1089 	ars = (struct skmem_arena_system *)ar;
1090 
1091 	AR_LOCK(ar);
1092 	/* retain system-wide sysctls region */
1093 	skrsys = skmem_get_sysctls_region();
1094 	ASSERT(skrsys != NULL && skrsys->skr_id == SKMEM_REGION_SYSCTLS);
1095 	ASSERT((skrsys->skr_mode & (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES |
1096 	    SKR_MODE_KREADONLY | SKR_MODE_UREADONLY | SKR_MODE_MONOLITHIC |
1097 	    SKR_MODE_SHAREOK)) ==
1098 	    (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES | SKR_MODE_UREADONLY |
1099 	    SKR_MODE_MONOLITHIC));
1100 	ar->ar_regions[SKMEM_REGION_SYSCTLS] = skrsys;
1101 	skmem_region_retain(skrsys);
1102 
1103 	/* object is valid as long as the sysctls region is retained */
1104 	ars->ars_sysctls_obj = skmem_get_sysctls_obj(&ars->ars_sysctls_objsize);
1105 	ASSERT(ars->ars_sysctls_obj != NULL);
1106 	ASSERT(ars->ars_sysctls_objsize != 0);
1107 
1108 	if (skmem_arena_create_finalize(ar) != 0) {
1109 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
1110 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1111 		goto failed;
1112 	}
1113 
1114 	/*
1115 	 * These must never be configured for system arena.
1116 	 *
1117 	 * XXX: In theory we can add guard pages to this arena,
1118 	 * but for now leave that as an exercise for the future.
1119 	 */
1120 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1121 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1122 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1123 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1124 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1125 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1126 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1127 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1128 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1129 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1130 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1131 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1132 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1133 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1134 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1135 	}
1136 
1137 	/* these must be configured for system arena */
1138 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] != NULL);
1139 
1140 	++ar->ar_refcnt;        /* for caller */
1141 	AR_UNLOCK(ar);
1142 
1143 	SKMEM_ARENA_LOCK();
1144 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1145 	SKMEM_ARENA_UNLOCK();
1146 
1147 #if SK_LOG
1148 	if (__improbable(sk_verbose != 0)) {
1149 		skmem_arena_create_region_log(ar);
1150 	}
1151 #endif /* SK_LOG */
1152 
1153 	return ar;
1154 
1155 failed:
1156 	AR_LOCK_ASSERT_HELD(ar);
1157 	skmem_arena_destroy(ar);
1158 	*perr = ENOMEM;
1159 
1160 	return NULL;
1161 }
1162 
1163 static void
skmem_arena_system_teardown(struct skmem_arena_system * ars,boolean_t defunct)1164 skmem_arena_system_teardown(struct skmem_arena_system *ars, boolean_t defunct)
1165 {
1166 	struct skmem_arena *ar = &ars->ars_cmn;
1167 	struct skmem_region *skr;
1168 
1169 	AR_LOCK_ASSERT_HELD(ar);
1170 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1171 
1172 	/* these must never be configured for system arena */
1173 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1174 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1175 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1176 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1177 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1178 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1179 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1180 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1181 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1182 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1183 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1184 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1185 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1186 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1187 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1188 	}
1189 
1190 	/* nothing to do here for now during defunct, just return */
1191 	if (defunct) {
1192 		return;
1193 	}
1194 
1195 	if (ars->ars_sysctls_obj != NULL) {
1196 		skr = ar->ar_regions[SKMEM_REGION_SYSCTLS];
1197 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
1198 		/* we didn't allocate this, so don't free it */
1199 		ars->ars_sysctls_obj = NULL;
1200 		ars->ars_sysctls_objsize = 0;
1201 		skmem_region_release(skr);
1202 		ar->ar_regions[SKMEM_REGION_SYSCTLS] = NULL;
1203 	}
1204 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1205 	ASSERT(ars->ars_sysctls_obj == NULL);
1206 	ASSERT(ars->ars_sysctls_objsize == 0);
1207 }
1208 
1209 /*
1210  * Given an arena, return its System variant (if applicable).
1211  */
1212 struct skmem_arena_system *
skmem_arena_system(struct skmem_arena * ar)1213 skmem_arena_system(struct skmem_arena *ar)
1214 {
1215 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_SYSTEM)) {
1216 		return NULL;
1217 	}
1218 
1219 	return (struct skmem_arena_system *)ar;
1220 }
1221 
1222 void *
skmem_arena_system_sysctls_obj_addr(struct skmem_arena * ar)1223 skmem_arena_system_sysctls_obj_addr(struct skmem_arena *ar)
1224 {
1225 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1226 	return skmem_arena_system(ar)->ars_sysctls_obj;
1227 }
1228 
1229 size_t
skmem_arena_system_sysctls_obj_size(struct skmem_arena * ar)1230 skmem_arena_system_sysctls_obj_size(struct skmem_arena *ar)
1231 {
1232 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1233 	return skmem_arena_system(ar)->ars_sysctls_objsize;
1234 }
1235 
1236 /*
1237  * Destroy a region.
1238  */
1239 static void
skmem_arena_destroy(struct skmem_arena * ar)1240 skmem_arena_destroy(struct skmem_arena *ar)
1241 {
1242 	AR_LOCK_ASSERT_HELD(ar);
1243 
1244 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags %b",
1245 	    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1246 
1247 	ASSERT(ar->ar_refcnt == 0);
1248 	if (ar->ar_link.tqe_next != NULL || ar->ar_link.tqe_prev != NULL) {
1249 		AR_UNLOCK(ar);
1250 		SKMEM_ARENA_LOCK();
1251 		TAILQ_REMOVE(&skmem_arena_head, ar, ar_link);
1252 		SKMEM_ARENA_UNLOCK();
1253 		AR_LOCK(ar);
1254 		ASSERT(ar->ar_refcnt == 0);
1255 	}
1256 
1257 	/* teardown all remaining memory regions and associated resources */
1258 	skmem_arena_teardown(ar, FALSE);
1259 
1260 	if (ar->ar_ar != NULL) {
1261 		IOSKArenaDestroy(ar->ar_ar);
1262 		ar->ar_ar = NULL;
1263 	}
1264 
1265 	if (ar->ar_flags & ARF_ACTIVE) {
1266 		ar->ar_flags &= ~ARF_ACTIVE;
1267 	}
1268 
1269 	AR_UNLOCK(ar);
1270 
1271 	skmem_arena_free(ar);
1272 }
1273 
1274 /*
1275  * Teardown (or defunct) a region.
1276  */
1277 static void
skmem_arena_teardown(struct skmem_arena * ar,boolean_t defunct)1278 skmem_arena_teardown(struct skmem_arena *ar, boolean_t defunct)
1279 {
1280 	uint32_t i;
1281 
1282 	switch (ar->ar_type) {
1283 	case SKMEM_ARENA_TYPE_NEXUS:
1284 		skmem_arena_nexus_teardown((struct skmem_arena_nexus *)ar,
1285 		    defunct);
1286 		break;
1287 
1288 	case SKMEM_ARENA_TYPE_NECP:
1289 		skmem_arena_necp_teardown((struct skmem_arena_necp *)ar,
1290 		    defunct);
1291 		break;
1292 
1293 	case SKMEM_ARENA_TYPE_SYSTEM:
1294 		skmem_arena_system_teardown((struct skmem_arena_system *)ar,
1295 		    defunct);
1296 		break;
1297 
1298 	default:
1299 		VERIFY(0);
1300 		/* NOTREACHED */
1301 		__builtin_unreachable();
1302 	}
1303 
1304 	/* stop here if we're in the defunct context */
1305 	if (defunct) {
1306 		return;
1307 	}
1308 
1309 	/* take care of any remaining ones */
1310 	for (i = 0; i < SKMEM_REGIONS; i++) {
1311 		if (ar->ar_regions[i] == NULL) {
1312 			continue;
1313 		}
1314 
1315 		skmem_region_release(ar->ar_regions[i]);
1316 		ar->ar_regions[i] = NULL;
1317 	}
1318 }
1319 
1320 static int
skmem_arena_create_finalize(struct skmem_arena * ar)1321 skmem_arena_create_finalize(struct skmem_arena *ar)
1322 {
1323 	IOSKRegionRef reg[SKMEM_REGIONS];
1324 	uint32_t i, regcnt = 0;
1325 	int err = 0;
1326 
1327 	AR_LOCK_ASSERT_HELD(ar);
1328 
1329 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1330 
1331 	/*
1332 	 * Prepare an array of regions that can be mapped to user task;
1333 	 * exclude regions that aren't eligible for user task mapping.
1334 	 */
1335 	bzero(&reg, sizeof(reg));
1336 	for (i = 0; i < SKMEM_REGIONS; i++) {
1337 		struct skmem_region *skr = ar->ar_regions[i];
1338 		if (skr == NULL || !(skr->skr_mode & SKR_MODE_MMAPOK)) {
1339 			continue;
1340 		}
1341 
1342 		ASSERT(skr->skr_reg != NULL);
1343 		reg[regcnt++] = skr->skr_reg;
1344 	}
1345 	ASSERT(regcnt != 0);
1346 
1347 	/*
1348 	 * Create backing IOSKArena handle.
1349 	 */
1350 	ar->ar_ar = IOSKArenaCreate(reg, (IOSKCount)regcnt);
1351 	if (ar->ar_ar == NULL) {
1352 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create "
1353 		    "IOSKArena of %u regions", ar->ar_name, SK_KVA(ar),
1354 		    ar->ar_flags, ARF_BITS, regcnt);
1355 		err = ENOMEM;
1356 		goto failed;
1357 	}
1358 
1359 	ar->ar_flags |= ARF_ACTIVE;
1360 
1361 failed:
1362 	return err;
1363 }
1364 
1365 static struct skmem_arena *
skmem_arena_alloc(skmem_arena_type_t type,const char * name)1366 skmem_arena_alloc(skmem_arena_type_t type, const char *name)
1367 {
1368 	const char *ar_str = NULL;
1369 	struct zone *ar_zone = NULL;
1370 	struct skmem_arena *ar;
1371 	size_t ar_zsize = 0;
1372 
1373 	switch (type) {
1374 	case SKMEM_ARENA_TYPE_NEXUS:
1375 		ar_zone = ar_nexus_zone;
1376 		ar_zsize = AR_NEXUS_SIZE;
1377 		ar_str = "nexus";
1378 		break;
1379 
1380 	case SKMEM_ARENA_TYPE_NECP:
1381 		ar_zone = ar_necp_zone;
1382 		ar_zsize = AR_NECP_SIZE;
1383 		ar_str = "necp";
1384 		break;
1385 
1386 	case SKMEM_ARENA_TYPE_SYSTEM:
1387 		ar_zone = ar_system_zone;
1388 		ar_zsize = AR_SYSTEM_SIZE;
1389 		ar_str = "system";
1390 		break;
1391 
1392 	default:
1393 		VERIFY(0);
1394 		/* NOTREACHED */
1395 		__builtin_unreachable();
1396 	}
1397 
1398 	ar = zalloc_flags(ar_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1399 	ar->ar_type = type;
1400 	ar->ar_zsize = ar_zsize;
1401 	ar->ar_zone = ar_zone;
1402 
1403 	lck_mtx_init(&ar->ar_lock, &skmem_arena_lock_grp,
1404 	    LCK_ATTR_NULL);
1405 	(void) snprintf(ar->ar_name, sizeof(ar->ar_name),
1406 	    "%s.%s.%s", SKMEM_ARENA_PREFIX, ar_str, name);
1407 
1408 	return ar;
1409 }
1410 
1411 static void
skmem_arena_free(struct skmem_arena * ar)1412 skmem_arena_free(struct skmem_arena *ar)
1413 {
1414 #if DEBUG || DEVELOPMENT
1415 	ASSERT(ar->ar_refcnt == 0);
1416 	ASSERT(!(ar->ar_flags & ARF_ACTIVE));
1417 	ASSERT(ar->ar_ar == NULL);
1418 	ASSERT(ar->ar_mapcnt == 0);
1419 	ASSERT(SLIST_EMPTY(&ar->ar_map_head));
1420 	for (uint32_t i = 0; i < SKMEM_REGIONS; i++) {
1421 		ASSERT(ar->ar_regions[i] == NULL);
1422 	}
1423 #endif /* DEBUG || DEVELOPMENT */
1424 
1425 	lck_mtx_destroy(&ar->ar_lock, &skmem_arena_lock_grp);
1426 	zfree(ar->ar_zone, ar);
1427 }
1428 
1429 /*
1430  * Retain an arena.
1431  */
1432 __attribute__((always_inline))
1433 static inline void
skmem_arena_retain_locked(struct skmem_arena * ar)1434 skmem_arena_retain_locked(struct skmem_arena *ar)
1435 {
1436 	AR_LOCK_ASSERT_HELD(ar);
1437 	ar->ar_refcnt++;
1438 	ASSERT(ar->ar_refcnt != 0);
1439 }
1440 
1441 void
skmem_arena_retain(struct skmem_arena * ar)1442 skmem_arena_retain(struct skmem_arena *ar)
1443 {
1444 	AR_LOCK(ar);
1445 	skmem_arena_retain_locked(ar);
1446 	AR_UNLOCK(ar);
1447 }
1448 
1449 /*
1450  * Release (and potentially destroy) an arena.
1451  */
1452 __attribute__((always_inline))
1453 static inline boolean_t
skmem_arena_release_locked(struct skmem_arena * ar)1454 skmem_arena_release_locked(struct skmem_arena *ar)
1455 {
1456 	boolean_t lastref = FALSE;
1457 
1458 	AR_LOCK_ASSERT_HELD(ar);
1459 	ASSERT(ar->ar_refcnt != 0);
1460 	if (--ar->ar_refcnt == 0) {
1461 		skmem_arena_destroy(ar);
1462 		lastref = TRUE;
1463 	} else {
1464 		lastref = FALSE;
1465 	}
1466 
1467 	return lastref;
1468 }
1469 
1470 boolean_t
skmem_arena_release(struct skmem_arena * ar)1471 skmem_arena_release(struct skmem_arena *ar)
1472 {
1473 	boolean_t lastref;
1474 
1475 	AR_LOCK(ar);
1476 	/* unlock only if this isn't the last reference */
1477 	if (!(lastref = skmem_arena_release_locked(ar))) {
1478 		AR_UNLOCK(ar);
1479 	}
1480 
1481 	return lastref;
1482 }
1483 
1484 /*
1485  * Map an arena to the task's address space.
1486  */
1487 int
skmem_arena_mmap(struct skmem_arena * ar,struct proc * p,struct skmem_arena_mmap_info * ami)1488 skmem_arena_mmap(struct skmem_arena *ar, struct proc *p,
1489     struct skmem_arena_mmap_info *ami)
1490 {
1491 	task_t task = proc_task(p);
1492 	IOReturn ioerr;
1493 	int err = 0;
1494 
1495 	ASSERT(task != kernel_task && task != TASK_NULL);
1496 	ASSERT(ami->ami_arena == NULL);
1497 	ASSERT(ami->ami_mapref == NULL);
1498 	ASSERT(ami->ami_maptask == TASK_NULL);
1499 	ASSERT(!ami->ami_redirect);
1500 
1501 	AR_LOCK(ar);
1502 	if ((ar->ar_flags & (ARF_ACTIVE | ARF_DEFUNCT)) != ARF_ACTIVE) {
1503 		err = ENODEV;
1504 		goto failed;
1505 	}
1506 
1507 	ASSERT(ar->ar_ar != NULL);
1508 	if ((ami->ami_mapref = IOSKMapperCreate(ar->ar_ar, task)) == NULL) {
1509 		err = ENOMEM;
1510 		goto failed;
1511 	}
1512 
1513 	ioerr = IOSKMapperGetAddress(ami->ami_mapref, &ami->ami_mapaddr,
1514 	    &ami->ami_mapsize);
1515 	VERIFY(ioerr == kIOReturnSuccess);
1516 
1517 	ami->ami_arena = ar;
1518 	skmem_arena_retain_locked(ar);
1519 	SLIST_INSERT_HEAD(&ar->ar_map_head, ami, ami_link);
1520 
1521 	ami->ami_maptask = task;
1522 	ar->ar_mapcnt++;
1523 	if (ar->ar_mapcnt == 1) {
1524 		ar->ar_mapsize = ami->ami_mapsize;
1525 	}
1526 
1527 	ASSERT(ami->ami_mapref != NULL);
1528 	ASSERT(ami->ami_arena == ar);
1529 	AR_UNLOCK(ar);
1530 
1531 	return 0;
1532 
1533 failed:
1534 	AR_UNLOCK(ar);
1535 	skmem_arena_munmap(ar, ami);
1536 	VERIFY(err != 0);
1537 
1538 	return err;
1539 }
1540 
1541 /*
1542  * Remove arena's memory mapping from task's address space (common code).
1543  * Returns true if caller needs to perform a deferred defunct.
1544  */
1545 static boolean_t
skmem_arena_munmap_common(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1546 skmem_arena_munmap_common(struct skmem_arena *ar,
1547     struct skmem_arena_mmap_info *ami)
1548 {
1549 	boolean_t need_defunct = FALSE;
1550 
1551 	AR_LOCK(ar);
1552 	if (ami->ami_mapref != NULL) {
1553 		IOSKMapperDestroy(ami->ami_mapref);
1554 		ami->ami_mapref = NULL;
1555 
1556 		VERIFY(ar->ar_mapcnt != 0);
1557 		ar->ar_mapcnt--;
1558 		if (ar->ar_mapcnt == 0) {
1559 			ar->ar_mapsize = 0;
1560 		}
1561 
1562 		VERIFY(ami->ami_arena == ar);
1563 		SLIST_REMOVE(&ar->ar_map_head, ami, skmem_arena_mmap_info,
1564 		    ami_link);
1565 
1566 		/*
1567 		 * We expect that the caller ensures an extra reference
1568 		 * held on the arena, in addition to the one in mmap_info.
1569 		 */
1570 		VERIFY(ar->ar_refcnt > 1);
1571 		(void) skmem_arena_release_locked(ar);
1572 		ami->ami_arena = NULL;
1573 
1574 		if (ami->ami_redirect) {
1575 			/*
1576 			 * This mapper has been redirected; decrement
1577 			 * the redirect count associated with it.
1578 			 */
1579 			VERIFY(ar->ar_maprdrcnt != 0);
1580 			ar->ar_maprdrcnt--;
1581 		} else if (ar->ar_maprdrcnt != 0 &&
1582 		    ar->ar_maprdrcnt == ar->ar_mapcnt) {
1583 			/*
1584 			 * The are other mappers for this arena that have
1585 			 * all been redirected, but the arena wasn't marked
1586 			 * inactive by skmem_arena_redirect() last time since
1587 			 * this particular mapper that we just destroyed
1588 			 * was using it.  Now that it's gone, finish the
1589 			 * postponed work below once we return to caller.
1590 			 */
1591 			ASSERT(ar->ar_flags & ARF_ACTIVE);
1592 			ar->ar_flags &= ~ARF_ACTIVE;
1593 			need_defunct = TRUE;
1594 		}
1595 	}
1596 	ASSERT(ami->ami_mapref == NULL);
1597 	ASSERT(ami->ami_arena == NULL);
1598 
1599 	ami->ami_maptask = TASK_NULL;
1600 	ami->ami_mapaddr = 0;
1601 	ami->ami_mapsize = 0;
1602 	ami->ami_redirect = FALSE;
1603 
1604 	AR_UNLOCK(ar);
1605 
1606 	return need_defunct;
1607 }
1608 
1609 /*
1610  * Remove arena's memory mapping from task's address space (channel version).
1611  * Will perform a deferred defunct if needed.
1612  */
1613 void
skmem_arena_munmap_channel(struct skmem_arena * ar,struct kern_channel * ch)1614 skmem_arena_munmap_channel(struct skmem_arena *ar, struct kern_channel *ch)
1615 {
1616 	SK_LOCK_ASSERT_HELD();
1617 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1618 
1619 	/*
1620 	 * If this is this is on a channel that was holding the last
1621 	 * active reference count on the arena, and that there are
1622 	 * other defunct channels pointing to that arena, perform the
1623 	 * actual arena defunct now.
1624 	 */
1625 	if (skmem_arena_munmap_common(ar, &ch->ch_mmap)) {
1626 		struct kern_nexus *nx = ch->ch_nexus;
1627 		struct kern_nexus_domain_provider *nxdom_prov = NX_DOM_PROV(nx);
1628 
1629 		/*
1630 		 * Similar to kern_channel_defunct(), where we let the
1631 		 * domain provider complete the defunct.  At this point
1632 		 * both sk_lock and the channel locks are held, and so
1633 		 * we indicate that to the callee.
1634 		 */
1635 		nxdom_prov->nxdom_prov_dom->nxdom_defunct_finalize(nxdom_prov,
1636 		    nx, ch, TRUE);
1637 	}
1638 }
1639 
1640 /*
1641  * Remove arena's memory mapping from task's address space (generic).
1642  * This routine should only be called on non-channel related arenas.
1643  */
1644 void
skmem_arena_munmap(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1645 skmem_arena_munmap(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami)
1646 {
1647 	(void) skmem_arena_munmap_common(ar, ami);
1648 }
1649 
1650 /*
1651  * Redirect eligible memory regions in the task's memory map so that
1652  * they get overwritten and backed with anonymous (zero-filled) pages.
1653  */
1654 int
skmem_arena_mredirect(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami,struct proc * p,boolean_t * need_defunct)1655 skmem_arena_mredirect(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami,
1656     struct proc *p, boolean_t *need_defunct)
1657 {
1658 #pragma unused(p)
1659 	int err = 0;
1660 
1661 	*need_defunct = FALSE;
1662 
1663 	AR_LOCK(ar);
1664 	ASSERT(ar->ar_ar != NULL);
1665 	if (ami->ami_redirect) {
1666 		err = EALREADY;
1667 	} else if (ami->ami_mapref == NULL) {
1668 		err = ENXIO;
1669 	} else {
1670 		VERIFY(ar->ar_mapcnt != 0);
1671 		ASSERT(ar->ar_flags & ARF_ACTIVE);
1672 		VERIFY(ami->ami_arena == ar);
1673 		/*
1674 		 * This effectively overwrites the mappings for all
1675 		 * redirectable memory regions (i.e. those without the
1676 		 * SKMEM_REGION_CR_NOREDIRECT flag) while preserving their
1677 		 * protection flags.  Accesses to these regions will be
1678 		 * redirected to anonymous, zero-filled pages.
1679 		 */
1680 		IOSKMapperRedirect(ami->ami_mapref);
1681 		ami->ami_redirect = TRUE;
1682 
1683 		/*
1684 		 * Mark the arena as inactive if all mapper instances are
1685 		 * redirected; otherwise, we do this later during unmap.
1686 		 * Once inactive, the arena will not allow further mmap,
1687 		 * and it is ready to be defunct later.
1688 		 */
1689 		if (++ar->ar_maprdrcnt == ar->ar_mapcnt) {
1690 			ar->ar_flags &= ~ARF_ACTIVE;
1691 			*need_defunct = TRUE;
1692 		}
1693 	}
1694 	AR_UNLOCK(ar);
1695 
1696 	SK_DF(((err != 0) ? SK_VERB_ERROR : SK_VERB_DEFAULT),
1697 	    "%s(%d) \"%s\" ar 0x%llx flags %b inactive %u need_defunct %u "
1698 	    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ar->ar_name,
1699 	    SK_KVA(ar), ar->ar_flags, ARF_BITS, !(ar->ar_flags & ARF_ACTIVE),
1700 	    *need_defunct, err);
1701 
1702 	return err;
1703 }
1704 
1705 /*
1706  * Defunct a region.
1707  */
1708 int
skmem_arena_defunct(struct skmem_arena * ar)1709 skmem_arena_defunct(struct skmem_arena *ar)
1710 {
1711 	AR_LOCK(ar);
1712 
1713 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags 0x%b", ar->ar_name,
1714 	    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1715 
1716 	if (ar->ar_flags & ARF_DEFUNCT) {
1717 		AR_UNLOCK(ar);
1718 		return EALREADY;
1719 	} else if (ar->ar_flags & ARF_ACTIVE) {
1720 		AR_UNLOCK(ar);
1721 		return EBUSY;
1722 	}
1723 
1724 	/* purge the caches now */
1725 	skmem_arena_reap_locked(ar, TRUE);
1726 
1727 	/* teardown eligible memory regions and associated resources */
1728 	skmem_arena_teardown(ar, TRUE);
1729 
1730 	ar->ar_flags |= ARF_DEFUNCT;
1731 
1732 	AR_UNLOCK(ar);
1733 
1734 	return 0;
1735 }
1736 
1737 /*
1738  * Retrieve total and in-use memory statistics of regions in the arena.
1739  */
1740 void
skmem_arena_get_stats(struct skmem_arena * ar,uint64_t * mem_total,uint64_t * mem_inuse)1741 skmem_arena_get_stats(struct skmem_arena *ar, uint64_t *mem_total,
1742     uint64_t *mem_inuse)
1743 {
1744 	uint32_t i;
1745 
1746 	if (mem_total != NULL) {
1747 		*mem_total = 0;
1748 	}
1749 	if (mem_inuse != NULL) {
1750 		*mem_inuse = 0;
1751 	}
1752 
1753 	AR_LOCK(ar);
1754 	for (i = 0; i < SKMEM_REGIONS; i++) {
1755 		if (ar->ar_regions[i] == NULL) {
1756 			continue;
1757 		}
1758 
1759 		if (mem_total != NULL) {
1760 			*mem_total += AR_MEM_TOTAL(ar, i);
1761 		}
1762 		if (mem_inuse != NULL) {
1763 			*mem_inuse += AR_MEM_INUSE(ar, i);
1764 		}
1765 	}
1766 	AR_UNLOCK(ar);
1767 }
1768 
1769 /*
1770  * Retrieve the offset of a particular region (identified by its ID)
1771  * from the base of the arena.
1772  */
1773 mach_vm_offset_t
skmem_arena_get_region_offset(struct skmem_arena * ar,skmem_region_id_t id)1774 skmem_arena_get_region_offset(struct skmem_arena *ar, skmem_region_id_t id)
1775 {
1776 	mach_vm_offset_t offset = 0;
1777 	uint32_t i;
1778 
1779 	ASSERT(id < SKMEM_REGIONS);
1780 
1781 	AR_LOCK(ar);
1782 	for (i = 0; i < id; i++) {
1783 		if (ar->ar_regions[i] == NULL) {
1784 			continue;
1785 		}
1786 
1787 		offset += ar->ar_regions[i]->skr_size;
1788 	}
1789 	AR_UNLOCK(ar);
1790 
1791 	return offset;
1792 }
1793 
1794 /*
1795  * Reap all of configured caches in the arena, so that any excess amount
1796  * outside of their working sets gets released to their respective backing
1797  * regions.  If purging is specified, we empty the caches' working sets,
1798  * including everything that's cached at the CPU layer.
1799  */
1800 static void
skmem_arena_reap_locked(struct skmem_arena * ar,boolean_t purge)1801 skmem_arena_reap_locked(struct skmem_arena *ar, boolean_t purge)
1802 {
1803 	struct skmem_arena_nexus *arn;
1804 	struct skmem_arena_necp *arc;
1805 	struct kern_pbufpool *pp;
1806 
1807 	AR_LOCK_ASSERT_HELD(ar);
1808 
1809 	switch (ar->ar_type) {
1810 	case SKMEM_ARENA_TYPE_NEXUS:
1811 		arn = (struct skmem_arena_nexus *)ar;
1812 		if (arn->arn_schema_cache != NULL) {
1813 			skmem_cache_reap_now(arn->arn_schema_cache, purge);
1814 		}
1815 		if (arn->arn_ring_cache != NULL) {
1816 			skmem_cache_reap_now(arn->arn_ring_cache, purge);
1817 		}
1818 		if ((pp = arn->arn_rx_pp) != NULL) {
1819 			if (pp->pp_kmd_cache != NULL) {
1820 				skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1821 			}
1822 			if (pp->pp_buf_cache != NULL) {
1823 				skmem_cache_reap_now(pp->pp_buf_cache, purge);
1824 			}
1825 			if (pp->pp_kbft_cache != NULL) {
1826 				skmem_cache_reap_now(pp->pp_kbft_cache, purge);
1827 			}
1828 		}
1829 		if ((pp = arn->arn_tx_pp) != NULL && pp != arn->arn_rx_pp) {
1830 			if (pp->pp_kmd_cache != NULL) {
1831 				skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1832 			}
1833 			if (pp->pp_buf_cache != NULL) {
1834 				skmem_cache_reap_now(pp->pp_buf_cache, purge);
1835 			}
1836 			if (pp->pp_kbft_cache != NULL) {
1837 				skmem_cache_reap_now(pp->pp_kbft_cache, purge);
1838 			}
1839 		}
1840 		break;
1841 
1842 	case SKMEM_ARENA_TYPE_NECP:
1843 		arc = (struct skmem_arena_necp *)ar;
1844 		if (arc->arc_kstats_cache != NULL) {
1845 			skmem_cache_reap_now(arc->arc_kstats_cache, purge);
1846 		}
1847 		break;
1848 
1849 	case SKMEM_ARENA_TYPE_SYSTEM:
1850 		break;
1851 	}
1852 }
1853 
1854 void
skmem_arena_reap(struct skmem_arena * ar,boolean_t purge)1855 skmem_arena_reap(struct skmem_arena *ar, boolean_t purge)
1856 {
1857 	AR_LOCK(ar);
1858 	skmem_arena_reap_locked(ar, purge);
1859 	AR_UNLOCK(ar);
1860 }
1861 
1862 #if SK_LOG
1863 SK_LOG_ATTRIBUTE
1864 static void
skmem_arena_create_region_log(struct skmem_arena * ar)1865 skmem_arena_create_region_log(struct skmem_arena *ar)
1866 {
1867 	char label[32];
1868 	int i;
1869 
1870 	switch (ar->ar_type) {
1871 	case SKMEM_ARENA_TYPE_NEXUS:
1872 		SK_D("\"%s\" ar 0x%llx flags %b rx_pp 0x%llx tx_pp 0x%llu",
1873 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
1874 		    SK_KVA(skmem_arena_nexus(ar)->arn_rx_pp),
1875 		    SK_KVA(skmem_arena_nexus(ar)->arn_tx_pp));
1876 		break;
1877 
1878 	case SKMEM_ARENA_TYPE_NECP:
1879 	case SKMEM_ARENA_TYPE_SYSTEM:
1880 		SK_D("\"%s\" ar 0x%llx flags %b", ar->ar_name,
1881 		    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1882 		break;
1883 	}
1884 
1885 	for (i = 0; i < SKMEM_REGIONS; i++) {
1886 		if (ar->ar_regions[i] == NULL) {
1887 			continue;
1888 		}
1889 
1890 		(void) snprintf(label, sizeof(label), "REGION_%s:",
1891 		    skmem_region_id2name(i));
1892 		SK_D("  %-16s %6u KB s:[%2u x %6u KB] "
1893 		    "o:[%4u x %6u -> %4u x %6u]", label,
1894 		    (uint32_t)AR_MEM_TOTAL(ar, i) >> 10,
1895 		    (uint32_t)AR_MEM_SEGCNT(ar, i),
1896 		    (uint32_t)AR_MEM_SEGSIZE(ar, i) >> 10,
1897 		    (uint32_t)AR_MEM_OBJCNT_R(ar, i),
1898 		    (uint32_t)AR_MEM_OBJSIZE_R(ar, i),
1899 		    (uint32_t)AR_MEM_OBJCNT_C(ar, i),
1900 		    (uint32_t)AR_MEM_OBJSIZE_C(ar, i));
1901 	}
1902 }
1903 #endif /* SK_LOG */
1904 
1905 static size_t
skmem_arena_mib_get_stats(struct skmem_arena * ar,void * out,size_t len)1906 skmem_arena_mib_get_stats(struct skmem_arena *ar, void *out, size_t len)
1907 {
1908 	size_t actual_space = sizeof(struct sk_stats_arena);
1909 	struct sk_stats_arena *sar = out;
1910 	struct skmem_arena_mmap_info *ami = NULL;
1911 	pid_t proc_pid;
1912 	int i;
1913 
1914 	if (out == NULL || len < actual_space) {
1915 		goto done;
1916 	}
1917 
1918 	AR_LOCK(ar);
1919 	(void) snprintf(sar->sar_name, sizeof(sar->sar_name),
1920 	    "%s", ar->ar_name);
1921 	sar->sar_type = (sk_stats_arena_type_t)ar->ar_type;
1922 	sar->sar_mapsize = (uint64_t)ar->ar_mapsize;
1923 	i = 0;
1924 	SLIST_FOREACH(ami, &ar->ar_map_head, ami_link) {
1925 		if (ami->ami_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS) {
1926 			struct kern_channel *ch;
1927 			ch = container_of(ami, struct kern_channel, ch_mmap);
1928 			proc_pid = ch->ch_pid;
1929 		} else {
1930 			ASSERT((ami->ami_arena->ar_type ==
1931 			    SKMEM_ARENA_TYPE_NECP) ||
1932 			    (ami->ami_arena->ar_type ==
1933 			    SKMEM_ARENA_TYPE_SYSTEM));
1934 			proc_pid =
1935 			    necp_client_get_proc_pid_from_arena_info(ami);
1936 		}
1937 		sar->sar_mapped_pids[i++] = proc_pid;
1938 		if (i >= SK_STATS_ARENA_MAPPED_PID_MAX) {
1939 			break;
1940 		}
1941 	}
1942 
1943 	for (i = 0; i < SKMEM_REGIONS; i++) {
1944 		struct skmem_region *skr = ar->ar_regions[i];
1945 		uuid_t *sreg_uuid = &sar->sar_regions_uuid[i];
1946 
1947 		if (skr == NULL) {
1948 			uuid_clear(*sreg_uuid);
1949 			continue;
1950 		}
1951 
1952 		uuid_copy(*sreg_uuid, skr->skr_uuid);
1953 	}
1954 	AR_UNLOCK(ar);
1955 
1956 done:
1957 	return actual_space;
1958 }
1959 
1960 static int
1961 skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS
1962 {
1963 #pragma unused(arg1, arg2, oidp)
1964 	struct skmem_arena *ar;
1965 	size_t actual_space;
1966 	size_t buffer_space;
1967 	size_t allocated_space;
1968 	caddr_t buffer = NULL;
1969 	caddr_t scan;
1970 	int error = 0;
1971 
1972 	if (!kauth_cred_issuser(kauth_cred_get())) {
1973 		return EPERM;
1974 	}
1975 
1976 	net_update_uptime();
1977 	buffer_space = req->oldlen;
1978 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
1979 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
1980 			buffer_space = SK_SYSCTL_ALLOC_MAX;
1981 		}
1982 		allocated_space = buffer_space;
1983 		buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_arena_mib);
1984 		if (__improbable(buffer == NULL)) {
1985 			return ENOBUFS;
1986 		}
1987 	} else if (req->oldptr == USER_ADDR_NULL) {
1988 		buffer_space = 0;
1989 	}
1990 	actual_space = 0;
1991 	scan = buffer;
1992 
1993 	SKMEM_ARENA_LOCK();
1994 	TAILQ_FOREACH(ar, &skmem_arena_head, ar_link) {
1995 		size_t size = skmem_arena_mib_get_stats(ar, scan, buffer_space);
1996 		if (scan != NULL) {
1997 			if (buffer_space < size) {
1998 				/* supplied buffer too small, stop copying */
1999 				error = ENOMEM;
2000 				break;
2001 			}
2002 			scan += size;
2003 			buffer_space -= size;
2004 		}
2005 		actual_space += size;
2006 	}
2007 	SKMEM_ARENA_UNLOCK();
2008 
2009 	if (actual_space != 0) {
2010 		int out_error = SYSCTL_OUT(req, buffer, actual_space);
2011 		if (out_error != 0) {
2012 			error = out_error;
2013 		}
2014 	}
2015 	if (buffer != NULL) {
2016 		sk_free_data(buffer, allocated_space);
2017 	}
2018 
2019 	return error;
2020 }
2021