xref: /xnu-10002.41.9/bsd/skywalk/mem/skmem_arena.c (revision 699cd48037512bf4380799317ca44ca453c82f57)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* BEGIN CSTYLED */
30 /*
31  * SKMEM_ARENA_TYPE_NEXUS:
32  *
33  *   This arena represents the memory subsystem of a nexus adapter.  It consist
34  *   of a collection of memory regions that are usable by the nexus, as well
35  *   as the various caches for objects in those regions.
36  *
37  *       (1 per nexus adapter)
38  *     +=======================+
39  *     |      skmem_arena      |
40  *     +-----------------------+              (backing regions)
41  *     |     ar_regions[0]     |           +=======================+
42  *     :          ...          : ------->> |     skmem_region      |===+
43  *     |     ar_regions[n]     |           +=======================+   |===+
44  *     +=======================+               +=======================+   |
45  *     |     arn_{caches,pp}   | ---+              +=======================+
46  *     +-----------------------+    |
47  *     |     arn_stats_obj     |    |
48  *     |     arn_flowadv_obj   |    |         (cache frontends)
49  *     |     arn_nexusadv_obj  |    |      +=======================+
50  *     +-----------------------+    +--->> |     skmem_cache       |===+
51  *                                         +=======================+   |===+
52  *                                             +=======================+   |
53  *                                                 +=======================+
54  *
55  *   Three regions {umd,kmd,buf} are used for the packet buffer pool, which
56  *   may be external to the nexus adapter, e.g. created by the driver or an
57  *   external entity.  If not supplied, we create these regions along with
58  *   the packet buffer pool ourselves.  The rest of the regions (unrelated
59  *   to the packet buffer pool) are unique to the arena and are allocated at
60  *   arena creation time.
61  *
62  *   An arena may be mapped to a user task/process for as many times as needed.
63  *   The result of each mapping is a contiguous range within the address space
64  *   of that task, indicated by [ami_mapaddr, ami_mapaddr + ami_mapsize) span.
65  *   This is achieved by leveraging the mapper memory object ar_mapper that
66  *   "stitches" the disjoint segments together.  Only user-mappable regions,
67  *   i.e. those marked with SKR_MODE_MMAPOK, will be included in this span.
68  *
69  *   Nexus adapters that are eligible for defunct will trigger the arena to
70  *   undergo memory redirection for all regions except those that are marked
71  *   with SKR_MODE_NOREDIRECT.  This happens when all of the channels opened
72  *   to the adapter are defunct.  Upon completion, those redirected regions
73  *   will be torn down in order to reduce their memory footprints.  When this
74  *   happens the adapter and its arena are no longer active or in service.
75  *
76  *   The arena exposes caches for allocating and freeing most region objects.
77  *   These slab-allocator based caches act as front-ends to the regions; only
78  *   the metadata cache (for kern_packet_t) utilizes the magazines layer.  All
79  *   other ones simply utilize skmem_cache for slab-based allocations.
80  *
81  *   Certain regions contain singleton objects that are simple enough to not
82  *   require the slab allocator, such as the ones used for statistics and flow
83  *   advisories.  Because of this, we directly allocate from those regions
84  *   and store the objects in the arena.
85  *
86  * SKMEM_ARENA_TYPE_NECP:
87  *
88  *   This arena represents the memory subsystem of an NECP file descriptor
89  *   object.  It consists of a memory region for per-flow statistics, as well
90  *   as a cache front-end for that region.
91  *
92  * SKMEM_ARENA_SYSTEM:
93  *
94  *   This arena represents general, system-wide objects.  It currently
95  *   consists of the sysctls region that's created once at init time.
96  */
97 /* END CSTYLED */
98 
99 #include <skywalk/os_skywalk_private.h>
100 #include <net/necp.h>
101 
102 static void skmem_arena_destroy(struct skmem_arena *);
103 static void skmem_arena_teardown(struct skmem_arena *, boolean_t);
104 static int skmem_arena_create_finalize(struct skmem_arena *);
105 static void skmem_arena_nexus_teardown(struct skmem_arena_nexus *, boolean_t);
106 static void skmem_arena_necp_teardown(struct skmem_arena_necp *, boolean_t);
107 static void skmem_arena_system_teardown(struct skmem_arena_system *, boolean_t);
108 static struct skmem_arena *skmem_arena_alloc(skmem_arena_type_t,
109     const char *);
110 static void skmem_arena_free(struct skmem_arena *);
111 static void skmem_arena_retain_locked(struct skmem_arena *);
112 static void skmem_arena_reap_locked(struct skmem_arena *, boolean_t);
113 static boolean_t skmem_arena_munmap_common(struct skmem_arena *,
114     struct skmem_arena_mmap_info *);
115 #if SK_LOG
116 static void skmem_arena_create_region_log(struct skmem_arena *);
117 #endif /* SK_LOG */
118 static int skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS;
119 
120 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, arena,
121     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
122     0, 0, skmem_arena_mib_get_sysctl, "S,sk_stats_arena",
123     "Skywalk arena statistics");
124 
125 static LCK_GRP_DECLARE(skmem_arena_lock_grp, "skmem_arena");
126 static LCK_MTX_DECLARE(skmem_arena_lock, &skmem_arena_lock_grp);
127 
128 static TAILQ_HEAD(, skmem_arena) skmem_arena_head = TAILQ_HEAD_INITIALIZER(skmem_arena_head);
129 
130 #define SKMEM_ARENA_LOCK()                      \
131 	lck_mtx_lock(&skmem_arena_lock)
132 #define SKMEM_ARENA_LOCK_ASSERT_HELD()          \
133 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_OWNED)
134 #define SKMEM_ARENA_LOCK_ASSERT_NOTHELD()       \
135 	LCK_MTX_ASSERT(&skmem_arena_lock, LCK_MTX_ASSERT_NOTOWNED)
136 #define SKMEM_ARENA_UNLOCK()                    \
137 	lck_mtx_unlock(&skmem_arena_lock)
138 
139 #define AR_NEXUS_SIZE           sizeof(struct skmem_arena_nexus)
140 static SKMEM_TYPE_DEFINE(ar_nexus_zone, struct skmem_arena_nexus);
141 
142 #define AR_NECP_SIZE            sizeof(struct skmem_arena_necp)
143 static SKMEM_TYPE_DEFINE(ar_necp_zone, struct skmem_arena_necp);
144 
145 #define AR_SYSTEM_SIZE          sizeof(struct skmem_arena_system)
146 static SKMEM_TYPE_DEFINE(ar_system_zone, struct skmem_arena_system);
147 
148 #define SKMEM_TAG_ARENA_MIB     "com.apple.skywalk.arena.mib"
149 static SKMEM_TAG_DEFINE(skmem_tag_arena_mib, SKMEM_TAG_ARENA_MIB);
150 
151 static_assert(SKMEM_ARENA_TYPE_NEXUS == SAR_TYPE_NEXUS);
152 static_assert(SKMEM_ARENA_TYPE_NECP == SAR_TYPE_NECP);
153 static_assert(SKMEM_ARENA_TYPE_SYSTEM == SAR_TYPE_SYSTEM);
154 
155 SK_NO_INLINE_ATTRIBUTE
156 static int
skmem_arena_sd_setup(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct skmem_arena * ar,boolean_t kernel_only,boolean_t tx)157 skmem_arena_sd_setup(const struct nexus_adapter *na,
158     struct skmem_region_params srp[SKMEM_REGIONS], struct skmem_arena *ar,
159     boolean_t kernel_only, boolean_t tx)
160 {
161 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
162 	struct skmem_cache **cachep;
163 	struct skmem_region *ksd_skr = NULL, *usd_skr = NULL;
164 	const char *name = na->na_name;
165 	char cname[64];
166 	skmem_region_id_t usd_type, ksd_type;
167 	int err = 0;
168 
169 	usd_type = tx ? SKMEM_REGION_TXAUSD : SKMEM_REGION_RXFUSD;
170 	ksd_type = tx ? SKMEM_REGION_TXAKSD : SKMEM_REGION_RXFKSD;
171 	if (tx) {
172 		usd_type = SKMEM_REGION_TXAUSD;
173 		ksd_type = SKMEM_REGION_TXAKSD;
174 		cachep = &arn->arn_txaksd_cache;
175 	} else {
176 		usd_type = SKMEM_REGION_RXFUSD;
177 		ksd_type = SKMEM_REGION_RXFKSD;
178 		cachep = &arn->arn_rxfksd_cache;
179 	}
180 	ksd_skr = skmem_region_create(name, &srp[ksd_type], NULL, NULL, NULL);
181 	if (ksd_skr == NULL) {
182 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
183 		    "create %s region", ar->ar_name, SK_KVA(ar),
184 		    ar->ar_flags, ARF_BITS, srp[ksd_type].srp_name);
185 		err = ENOMEM;
186 		goto failed;
187 	}
188 	ar->ar_regions[ksd_type] = ksd_skr;
189 	if (!kernel_only) {
190 		usd_skr = skmem_region_create(name, &srp[usd_type], NULL,
191 		    NULL, NULL);
192 		if (usd_skr == NULL) {
193 			err = ENOMEM;
194 			goto failed;
195 		}
196 		ar->ar_regions[usd_type] = usd_skr;
197 		skmem_region_mirror(ksd_skr, usd_skr);
198 	}
199 	snprintf(cname, sizeof(cname), tx ? "txa_ksd.%s" : "rxf_ksd.%s", name);
200 	ASSERT(ar->ar_regions[ksd_type] != NULL);
201 	*cachep = skmem_cache_create(cname,
202 	    srp[ksd_type].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
203 	    ar->ar_regions[ksd_type], SKMEM_CR_NOMAGAZINES);
204 	if (*cachep == NULL) {
205 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
206 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
207 		err = ENOMEM;
208 		goto failed;
209 	}
210 	return 0;
211 
212 failed:
213 	if (ksd_skr != NULL) {
214 		skmem_region_release(ksd_skr);
215 		ar->ar_regions[ksd_type] = NULL;
216 	}
217 	if (usd_skr != NULL) {
218 		/*
219 		 * decrements refcnt incremented by skmem_region_mirror()
220 		 * this is not needed in case skmem_cache_create() succeeds
221 		 * because skmem_cache_destroy() does the release.
222 		 */
223 		skmem_region_release(usd_skr);
224 
225 		/* decrements the region's own refcnt */
226 		skmem_region_release(usd_skr);
227 		ar->ar_regions[usd_type] = NULL;
228 	}
229 	return err;
230 }
231 
232 SK_NO_INLINE_ATTRIBUTE
233 static void
skmem_arena_sd_teardown(struct skmem_arena * ar,boolean_t tx)234 skmem_arena_sd_teardown(struct skmem_arena *ar, boolean_t tx)
235 {
236 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
237 	struct skmem_cache **cachep;
238 	struct skmem_region **ksd_rp, **usd_rp;
239 
240 	if (tx) {
241 		cachep = &arn->arn_txaksd_cache;
242 		ksd_rp = &ar->ar_regions[SKMEM_REGION_TXAKSD];
243 		usd_rp = &ar->ar_regions[SKMEM_REGION_TXAUSD];
244 	} else {
245 		cachep = &arn->arn_rxfksd_cache;
246 		ksd_rp = &ar->ar_regions[SKMEM_REGION_RXFKSD];
247 		usd_rp = &ar->ar_regions[SKMEM_REGION_RXFUSD];
248 	}
249 	if (*cachep != NULL) {
250 		skmem_cache_destroy(*cachep);
251 		*cachep = NULL;
252 	}
253 	if (*usd_rp != NULL) {
254 		skmem_region_release(*usd_rp);
255 		*usd_rp = NULL;
256 	}
257 	if (*ksd_rp != NULL) {
258 		skmem_region_release(*ksd_rp);
259 		*ksd_rp = NULL;
260 	}
261 }
262 
263 static bool
skmem_arena_pp_setup(struct skmem_arena * ar,struct skmem_region_params srp[SKMEM_REGIONS],const char * name,struct kern_pbufpool * rx_pp,struct kern_pbufpool * tx_pp,boolean_t kernel_only,boolean_t pp_truncated_buf)264 skmem_arena_pp_setup(struct skmem_arena *ar,
265     struct skmem_region_params srp[SKMEM_REGIONS], const char *name,
266     struct kern_pbufpool *rx_pp, struct kern_pbufpool *tx_pp,
267     boolean_t kernel_only, boolean_t pp_truncated_buf)
268 {
269 	struct skmem_arena_nexus *arn = (struct skmem_arena_nexus *)ar;
270 
271 	if (rx_pp == NULL && tx_pp == NULL) {
272 		uint32_t ppcreatef = 0;
273 		if (pp_truncated_buf) {
274 			ppcreatef |= PPCREATEF_TRUNCATED_BUF;
275 		}
276 		if (kernel_only) {
277 			ppcreatef |= PPCREATEF_KERNEL_ONLY;
278 		}
279 		if (srp[SKMEM_REGION_KMD].srp_max_frags > 1) {
280 			ppcreatef |= PPCREATEF_ONDEMAND_BUF;
281 		}
282 		/* callee retains pp upon success */
283 		rx_pp = pp_create(name, srp, NULL, NULL, NULL, NULL, NULL,
284 		    ppcreatef);
285 		if (rx_pp == NULL) {
286 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create pp",
287 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
288 			return false;
289 		}
290 		pp_retain(rx_pp);
291 		tx_pp = rx_pp;
292 	} else {
293 		if (rx_pp == NULL) {
294 			rx_pp = tx_pp;
295 		} else if (tx_pp == NULL) {
296 			tx_pp = rx_pp;
297 		}
298 
299 		ASSERT(rx_pp->pp_md_type == tx_pp->pp_md_type);
300 		ASSERT(rx_pp->pp_md_subtype == tx_pp->pp_md_subtype);
301 		ASSERT(!(!kernel_only &&
302 		    (PP_KERNEL_ONLY(rx_pp) || (PP_KERNEL_ONLY(tx_pp)))));
303 		arn->arn_mode |= AR_NEXUS_MODE_EXTERNAL_PPOOL;
304 		pp_retain(rx_pp);
305 		pp_retain(tx_pp);
306 	}
307 
308 	arn->arn_rx_pp = rx_pp;
309 	arn->arn_tx_pp = tx_pp;
310 	if (rx_pp == tx_pp) {
311 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
312 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
313 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
314 		}
315 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = PP_BUF_REGION_DEF(rx_pp);
316 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] =
317 		    PP_BUF_REGION_LARGE(rx_pp);
318 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] = NULL;
319 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] = NULL;
320 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] = NULL;
321 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] = NULL;
322 		skmem_region_retain(rx_pp->pp_kmd_region);
323 		ar->ar_regions[SKMEM_REGION_KMD] = rx_pp->pp_kmd_region;
324 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
325 		ar->ar_regions[SKMEM_REGION_RXKMD] = NULL;
326 		if (rx_pp->pp_kbft_region != NULL) {
327 			skmem_region_retain(rx_pp->pp_kbft_region);
328 			ar->ar_regions[SKMEM_REGION_KBFT] =
329 			    rx_pp->pp_kbft_region;
330 		}
331 		ar->ar_regions[SKMEM_REGION_RXKBFT] = NULL;
332 		ar->ar_regions[SKMEM_REGION_TXKBFT] = NULL;
333 	} else {
334 		ASSERT(kernel_only); /* split userspace pools not supported */
335 		ar->ar_regions[SKMEM_REGION_BUF_DEF] = NULL;
336 		ar->ar_regions[SKMEM_REGION_BUF_LARGE] = NULL;
337 		skmem_region_retain(PP_BUF_REGION_DEF(rx_pp));
338 		ar->ar_regions[SKMEM_REGION_RXBUF_DEF] =
339 		    PP_BUF_REGION_DEF(rx_pp);
340 		ar->ar_regions[SKMEM_REGION_RXBUF_LARGE] =
341 		    PP_BUF_REGION_LARGE(rx_pp);
342 		if (PP_BUF_REGION_LARGE(rx_pp) != NULL) {
343 			skmem_region_retain(PP_BUF_REGION_LARGE(rx_pp));
344 		}
345 		skmem_region_retain(PP_BUF_REGION_DEF(tx_pp));
346 		ar->ar_regions[SKMEM_REGION_TXBUF_DEF] =
347 		    PP_BUF_REGION_DEF(tx_pp);
348 		ar->ar_regions[SKMEM_REGION_TXBUF_LARGE] =
349 		    PP_BUF_REGION_LARGE(tx_pp);
350 		if (PP_BUF_REGION_LARGE(tx_pp) != NULL) {
351 			skmem_region_retain(PP_BUF_REGION_LARGE(tx_pp));
352 		}
353 		ar->ar_regions[SKMEM_REGION_KMD] = NULL;
354 		skmem_region_retain(rx_pp->pp_kmd_region);
355 		ar->ar_regions[SKMEM_REGION_RXKMD] = rx_pp->pp_kmd_region;
356 		skmem_region_retain(tx_pp->pp_kmd_region);
357 		ar->ar_regions[SKMEM_REGION_TXKMD] = tx_pp->pp_kmd_region;
358 		ar->ar_regions[SKMEM_REGION_KBFT] = NULL;
359 		if (rx_pp->pp_kbft_region != NULL) {
360 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(rx_pp));
361 			skmem_region_retain(rx_pp->pp_kbft_region);
362 			ar->ar_regions[SKMEM_REGION_RXKBFT] =
363 			    rx_pp->pp_kbft_region;
364 		}
365 		if (tx_pp->pp_kbft_region != NULL) {
366 			ASSERT(PP_HAS_BUFFER_ON_DEMAND(tx_pp));
367 			skmem_region_retain(tx_pp->pp_kbft_region);
368 			ar->ar_regions[SKMEM_REGION_TXKBFT] =
369 			    tx_pp->pp_kbft_region;
370 		}
371 	}
372 
373 	if (kernel_only) {
374 		if ((arn->arn_mode & AR_NEXUS_MODE_EXTERNAL_PPOOL) == 0) {
375 			ASSERT(PP_KERNEL_ONLY(rx_pp));
376 			ASSERT(PP_KERNEL_ONLY(tx_pp));
377 			ASSERT(rx_pp->pp_umd_region == NULL);
378 			ASSERT(tx_pp->pp_umd_region == NULL);
379 			ASSERT(rx_pp->pp_kmd_region->skr_mirror == NULL);
380 			ASSERT(tx_pp->pp_kmd_region->skr_mirror == NULL);
381 			ASSERT(rx_pp->pp_ubft_region == NULL);
382 			ASSERT(tx_pp->pp_ubft_region == NULL);
383 			if (rx_pp->pp_kbft_region != NULL) {
384 				ASSERT(rx_pp->pp_kbft_region->skr_mirror ==
385 				    NULL);
386 			}
387 			if (tx_pp->pp_kbft_region != NULL) {
388 				ASSERT(tx_pp->pp_kbft_region->skr_mirror ==
389 				    NULL);
390 			}
391 		}
392 	} else {
393 		ASSERT(rx_pp == tx_pp);
394 		ASSERT(!PP_KERNEL_ONLY(rx_pp));
395 		ASSERT(rx_pp->pp_umd_region->skr_mode & SKR_MODE_MIRRORED);
396 		ASSERT(rx_pp->pp_kmd_region->skr_mirror != NULL);
397 		ar->ar_regions[SKMEM_REGION_UMD] = rx_pp->pp_umd_region;
398 		skmem_region_retain(rx_pp->pp_umd_region);
399 		if (rx_pp->pp_kbft_region != NULL) {
400 			ASSERT(rx_pp->pp_kbft_region->skr_mirror != NULL);
401 			ASSERT(rx_pp->pp_ubft_region != NULL);
402 			ASSERT(rx_pp->pp_ubft_region->skr_mode &
403 			    SKR_MODE_MIRRORED);
404 			ar->ar_regions[SKMEM_REGION_UBFT] =
405 			    rx_pp->pp_ubft_region;
406 			skmem_region_retain(rx_pp->pp_ubft_region);
407 		}
408 	}
409 
410 	arn->arn_md_type = rx_pp->pp_md_type;
411 	arn->arn_md_subtype = rx_pp->pp_md_subtype;
412 	return true;
413 }
414 
415 /*
416  * Create a nexus adapter arena.
417  */
418 struct skmem_arena *
skmem_arena_create_for_nexus(const struct nexus_adapter * na,struct skmem_region_params srp[SKMEM_REGIONS],struct kern_pbufpool ** tx_pp,struct kern_pbufpool ** rx_pp,boolean_t pp_truncated_buf,boolean_t kernel_only,struct kern_nexus_advisory * nxv,int * perr)419 skmem_arena_create_for_nexus(const struct nexus_adapter *na,
420     struct skmem_region_params srp[SKMEM_REGIONS], struct kern_pbufpool **tx_pp,
421     struct kern_pbufpool **rx_pp, boolean_t pp_truncated_buf,
422     boolean_t kernel_only, struct kern_nexus_advisory *nxv, int *perr)
423 {
424 #define SRP_CFLAGS(_id)         (srp[_id].srp_cflags)
425 	struct skmem_arena_nexus *arn;
426 	struct skmem_arena *ar;
427 	char cname[64];
428 	uint32_t i;
429 	const char *name = na->na_name;
430 
431 	*perr = 0;
432 
433 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NEXUS, name);
434 	ASSERT(ar != NULL && ar->ar_zsize == AR_NEXUS_SIZE);
435 	arn = (struct skmem_arena_nexus *)ar;
436 
437 	/* these regions must not be readable/writeable */
438 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_GUARD);
439 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_GUARD);
440 
441 	/* these regions must be read-only */
442 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_UREADONLY);
443 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_UREADONLY);
444 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_UREADONLY);
445 	if ((na->na_flags & NAF_USER_PKT_POOL) == 0) {
446 		ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
447 		    SKMEM_REGION_CR_UREADONLY);
448 		ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
449 		    SKMEM_REGION_CR_UREADONLY);
450 	} else {
451 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) &
452 		    SKMEM_REGION_CR_UREADONLY));
453 		ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) &
454 		    SKMEM_REGION_CR_UREADONLY));
455 	}
456 
457 	/* these regions must be user-mappable */
458 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_MMAPOK);
459 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_MMAPOK);
460 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_MMAPOK);
461 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_MMAPOK);
462 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_MMAPOK);
463 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_MMAPOK);
464 	ASSERT(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_MMAPOK);
465 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_MMAPOK);
466 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_MMAPOK);
467 	ASSERT(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_MMAPOK);
468 	ASSERT(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_MMAPOK);
469 	ASSERT(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_MMAPOK);
470 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_MMAPOK);
471 
472 	/* these must not be user-mappable */
473 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_MMAPOK));
474 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_MMAPOK));
475 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_MMAPOK));
476 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_MMAPOK));
477 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_MMAPOK));
478 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_MMAPOK));
479 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_MMAPOK));
480 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_MMAPOK));
481 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_MMAPOK));
482 
483 	/* these regions must be shareable */
484 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_DEF) & SKMEM_REGION_CR_SHAREOK);
485 	ASSERT(SRP_CFLAGS(SKMEM_REGION_BUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
486 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
487 	ASSERT(SRP_CFLAGS(SKMEM_REGION_RXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
488 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_DEF) & SKMEM_REGION_CR_SHAREOK);
489 	ASSERT(SRP_CFLAGS(SKMEM_REGION_TXBUF_LARGE) & SKMEM_REGION_CR_SHAREOK);
490 
491 	/* these regions must not be be shareable */
492 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_SHAREOK));
493 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_SHAREOK));
494 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RING) & SKMEM_REGION_CR_SHAREOK));
495 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UMD) & SKMEM_REGION_CR_SHAREOK));
496 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_UBFT) & SKMEM_REGION_CR_SHAREOK));
497 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAUSD) & SKMEM_REGION_CR_SHAREOK));
498 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFUSD) & SKMEM_REGION_CR_SHAREOK));
499 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_USTATS) & SKMEM_REGION_CR_SHAREOK));
500 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_FLOWADV) & SKMEM_REGION_CR_SHAREOK));
501 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_NEXUSADV) & SKMEM_REGION_CR_SHAREOK));
502 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_SHAREOK));
503 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KMD) & SKMEM_REGION_CR_SHAREOK));
504 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKMD) & SKMEM_REGION_CR_SHAREOK));
505 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKMD) & SKMEM_REGION_CR_SHAREOK));
506 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KBFT) & SKMEM_REGION_CR_SHAREOK));
507 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXKBFT) & SKMEM_REGION_CR_SHAREOK));
508 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXKBFT) & SKMEM_REGION_CR_SHAREOK));
509 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_TXAKSD) & SKMEM_REGION_CR_SHAREOK));
510 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_RXFKSD) & SKMEM_REGION_CR_SHAREOK));
511 	ASSERT(!(SRP_CFLAGS(SKMEM_REGION_KSTATS) & SKMEM_REGION_CR_SHAREOK));
512 
513 	/* these must stay active */
514 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_HEAD) & SKMEM_REGION_CR_NOREDIRECT);
515 	ASSERT(SRP_CFLAGS(SKMEM_REGION_SCHEMA) & SKMEM_REGION_CR_NOREDIRECT);
516 	ASSERT(SRP_CFLAGS(SKMEM_REGION_GUARD_TAIL) & SKMEM_REGION_CR_NOREDIRECT);
517 
518 	/* no kstats for nexus */
519 	ASSERT(srp[SKMEM_REGION_KSTATS].srp_c_obj_cnt == 0);
520 
521 	AR_LOCK(ar);
522 	if (!skmem_arena_pp_setup(ar, srp, name, (rx_pp ? *rx_pp : NULL),
523 	    (tx_pp ? *tx_pp : NULL), kernel_only, pp_truncated_buf)) {
524 		goto failed;
525 	}
526 
527 	if (nxv != NULL && nxv->nxv_reg != NULL) {
528 		struct skmem_region *svr = nxv->nxv_reg;
529 
530 		ASSERT(svr->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
531 		ASSERT(svr->skr_seg_max_cnt == 1);
532 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = svr;
533 		skmem_region_retain(svr);
534 
535 		ASSERT(nxv->nxv_adv != NULL);
536 		if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
537 			VERIFY(nxv->flowswitch_nxv_adv->nxadv_ver ==
538 			    NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
539 		} else if (nxv->nxv_adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
540 			VERIFY(nxv->netif_nxv_adv->nna_version ==
541 			    NX_NETIF_ADVISORY_CURRENT_VERSION);
542 		} else {
543 			panic_plain("%s: invalid advisory type %d",
544 			    __func__, nxv->nxv_adv_type);
545 			/* NOTREACHED */
546 		}
547 		arn->arn_nexusadv_obj = nxv->nxv_adv;
548 	} else {
549 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
550 		ASSERT(srp[SKMEM_REGION_NEXUSADV].srp_c_obj_cnt == 0);
551 	}
552 
553 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, TRUE) != 0) {
554 		goto failed;
555 	}
556 
557 	if (skmem_arena_sd_setup(na, srp, ar, kernel_only, FALSE) != 0) {
558 		goto failed;
559 	}
560 
561 	for (i = 0; i < SKMEM_REGIONS; i++) {
562 		/* skip if already created */
563 		if (ar->ar_regions[i] != NULL) {
564 			continue;
565 		}
566 
567 		/* skip external regions from packet pool */
568 		if (skmem_region_for_pp(i)) {
569 			continue;
570 		}
571 
572 		/* skip slot descriptor regions */
573 		if (i == SKMEM_REGION_TXAUSD || i == SKMEM_REGION_RXFUSD ||
574 		    i == SKMEM_REGION_TXAKSD || i == SKMEM_REGION_RXFKSD) {
575 			continue;
576 		}
577 
578 		/* skip if region is configured to be empty */
579 		if (srp[i].srp_c_obj_cnt == 0) {
580 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
581 			    i == SKMEM_REGION_USTATS ||
582 			    i == SKMEM_REGION_KSTATS ||
583 			    i == SKMEM_REGION_INTRINSIC ||
584 			    i == SKMEM_REGION_FLOWADV ||
585 			    i == SKMEM_REGION_NEXUSADV ||
586 			    i == SKMEM_REGION_SYSCTLS ||
587 			    i == SKMEM_REGION_GUARD_TAIL);
588 			continue;
589 		}
590 
591 		ASSERT(srp[i].srp_id == i);
592 
593 		/*
594 		 * Skip {SCHEMA, RING, GUARD} for kernel-only arena.  Note
595 		 * that this is assuming kernel-only arena is always used
596 		 * for kernel-only nexus adapters (never used directly by
597 		 * user process.)
598 		 *
599 		 * XXX [email protected] - see comments in kern_pbufpool_create().
600 		 * We need to revisit this logic for "direct channel" access,
601 		 * perhaps via a separate adapter flag.
602 		 */
603 		if (kernel_only && (i == SKMEM_REGION_GUARD_HEAD ||
604 		    i == SKMEM_REGION_SCHEMA || i == SKMEM_REGION_RING ||
605 		    i == SKMEM_REGION_GUARD_TAIL)) {
606 			continue;
607 		}
608 
609 		/* not for nexus, or for us to create here */
610 		ASSERT(i != SKMEM_REGION_GUARD_HEAD || sk_guard);
611 		ASSERT(i != SKMEM_REGION_NEXUSADV);
612 		ASSERT(i != SKMEM_REGION_SYSCTLS);
613 		ASSERT(i != SKMEM_REGION_GUARD_TAIL || sk_guard);
614 		ASSERT(i != SKMEM_REGION_KSTATS);
615 		ASSERT(i != SKMEM_REGION_INTRINSIC);
616 
617 		/* otherwise create it */
618 		if ((ar->ar_regions[i] = skmem_region_create(name, &srp[i],
619 		    NULL, NULL, NULL)) == NULL) {
620 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to "
621 			    "create %s region", ar->ar_name, SK_KVA(ar),
622 			    ar->ar_flags, ARF_BITS, srp[i].srp_name);
623 			goto failed;
624 		}
625 	}
626 
627 	/* create skmem_cache for schema (without magazines) */
628 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL || kernel_only);
629 	if (ar->ar_regions[SKMEM_REGION_SCHEMA] != NULL) {
630 		(void) snprintf(cname, sizeof(cname), "schema.%s", name);
631 		if ((arn->arn_schema_cache = skmem_cache_create(cname,
632 		    srp[SKMEM_REGION_SCHEMA].srp_c_obj_size, 0, NULL, NULL,
633 		    NULL, NULL, ar->ar_regions[SKMEM_REGION_SCHEMA],
634 		    SKMEM_CR_NOMAGAZINES)) == NULL) {
635 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
636 			    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
637 			    cname);
638 			goto failed;
639 		}
640 	}
641 
642 	/* create skmem_cache for rings (without magazines) */
643 	(void) snprintf(cname, sizeof(cname), "ring.%s", name);
644 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] != NULL || kernel_only);
645 	if ((ar->ar_regions[SKMEM_REGION_RING] != NULL) &&
646 	    (arn->arn_ring_cache = skmem_cache_create(cname,
647 	    srp[SKMEM_REGION_RING].srp_c_obj_size, 0, NULL, NULL, NULL, NULL,
648 	    ar->ar_regions[SKMEM_REGION_RING], SKMEM_CR_NOMAGAZINES)) == NULL) {
649 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
650 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
651 		goto failed;
652 	}
653 
654 	/*
655 	 * If the stats region is present, allocate a single object directly
656 	 * from the region; we don't need to create an skmem_cache for this,
657 	 * as the object is allocated (and freed) only once.
658 	 */
659 	if (ar->ar_regions[SKMEM_REGION_USTATS] != NULL) {
660 		struct skmem_region *str = ar->ar_regions[SKMEM_REGION_USTATS];
661 
662 		/* no kstats for nexus */
663 		ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
664 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
665 		ASSERT(str->skr_seg_max_cnt == 1);
666 
667 		if ((arn->arn_stats_obj = skmem_region_alloc(str, NULL,
668 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
669 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
670 			    "stats", ar->ar_name, SK_KVA(ar), ar->ar_flags,
671 			    ARF_BITS);
672 			goto failed;
673 		}
674 	}
675 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
676 
677 	/*
678 	 * If the flowadv region is present, allocate a single object directly
679 	 * from the region; we don't need to create an skmem_cache for this,
680 	 * as the object is allocated (and freed) only once.
681 	 */
682 	if (ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL) {
683 		struct skmem_region *str =
684 		    ar->ar_regions[SKMEM_REGION_FLOWADV];
685 
686 		ASSERT(str->skr_cflags & SKMEM_REGION_CR_MONOLITHIC);
687 		ASSERT(str->skr_seg_max_cnt == 1);
688 
689 		if ((arn->arn_flowadv_obj = skmem_region_alloc(str, NULL,
690 		    NULL, NULL, SKMEM_SLEEP)) == NULL) {
691 			SK_ERR("\"%s\" ar 0x%llx flags %b failed to alloc "
692 			    "flowadv", ar->ar_name, SK_KVA(ar), ar->ar_flags,
693 			    ARF_BITS);
694 			goto failed;
695 		}
696 	}
697 
698 	if (skmem_arena_create_finalize(ar) != 0) {
699 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
700 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
701 		goto failed;
702 	}
703 
704 	++ar->ar_refcnt;        /* for caller */
705 	AR_UNLOCK(ar);
706 
707 	SKMEM_ARENA_LOCK();
708 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
709 	SKMEM_ARENA_UNLOCK();
710 
711 	/* caller didn't give us one, but would like us to return it? */
712 	if (rx_pp != NULL && *rx_pp == NULL) {
713 		*rx_pp = arn->arn_rx_pp;
714 		pp_retain(*rx_pp);
715 	}
716 	if (tx_pp != NULL && *tx_pp == NULL) {
717 		*tx_pp = arn->arn_tx_pp;
718 		pp_retain(*tx_pp);  /* for caller */
719 	}
720 
721 #if SK_LOG
722 	if (__improbable(sk_verbose != 0)) {
723 		skmem_arena_create_region_log(ar);
724 	}
725 #endif /* SK_LOG */
726 
727 	return ar;
728 
729 failed:
730 	AR_LOCK_ASSERT_HELD(ar);
731 	skmem_arena_destroy(ar);
732 	*perr = ENOMEM;
733 
734 	return NULL;
735 #undef SRP_CFLAGS
736 }
737 
738 void
skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus * arn,int cnt)739 skmem_arena_nexus_sd_set_noidle(struct skmem_arena_nexus *arn, int cnt)
740 {
741 	struct skmem_arena *ar = &arn->arn_cmn;
742 
743 	AR_LOCK(ar);
744 	arn->arn_ksd_nodefunct += cnt;
745 	VERIFY(arn->arn_ksd_nodefunct >= 0);
746 	AR_UNLOCK(ar);
747 }
748 
749 boolean_t
skmem_arena_nexus_sd_idle(struct skmem_arena_nexus * arn)750 skmem_arena_nexus_sd_idle(struct skmem_arena_nexus *arn)
751 {
752 	struct skmem_arena *ar = &arn->arn_cmn;
753 	boolean_t idle;
754 
755 	AR_LOCK(ar);
756 	VERIFY(arn->arn_ksd_nodefunct >= 0);
757 	idle = (arn->arn_ksd_nodefunct == 0);
758 	AR_UNLOCK(ar);
759 
760 	return idle;
761 }
762 
763 static void
skmem_arena_nexus_teardown(struct skmem_arena_nexus * arn,boolean_t defunct)764 skmem_arena_nexus_teardown(struct skmem_arena_nexus *arn, boolean_t defunct)
765 {
766 	struct skmem_arena *ar = &arn->arn_cmn;
767 	struct skmem_region *skr;
768 	int i;
769 
770 	AR_LOCK_ASSERT_HELD(ar);
771 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
772 
773 	/* these should never be set for nexus arena */
774 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL || sk_guard);
775 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
776 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL || sk_guard);
777 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
778 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
779 
780 	if (arn->arn_stats_obj != NULL) {
781 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
782 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
783 		skmem_region_free(skr, arn->arn_stats_obj, NULL);
784 		arn->arn_stats_obj = NULL;
785 		skmem_region_release(skr);
786 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
787 	}
788 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
789 	ASSERT(arn->arn_stats_obj == NULL);
790 
791 	if (arn->arn_flowadv_obj != NULL) {
792 		skr = ar->ar_regions[SKMEM_REGION_FLOWADV];
793 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
794 		skmem_region_free(skr, arn->arn_flowadv_obj, NULL);
795 		arn->arn_flowadv_obj = NULL;
796 		skmem_region_release(skr);
797 		ar->ar_regions[SKMEM_REGION_FLOWADV] = NULL;
798 	}
799 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
800 	ASSERT(arn->arn_flowadv_obj == NULL);
801 
802 	if (arn->arn_nexusadv_obj != NULL) {
803 		skr = ar->ar_regions[SKMEM_REGION_NEXUSADV];
804 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
805 		/* we didn't allocate this, so just nullify it */
806 		arn->arn_nexusadv_obj = NULL;
807 		skmem_region_release(skr);
808 		ar->ar_regions[SKMEM_REGION_NEXUSADV] = NULL;
809 	}
810 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
811 	ASSERT(arn->arn_nexusadv_obj == NULL);
812 
813 	ASSERT(!((arn->arn_rx_pp == NULL) ^ (arn->arn_tx_pp == NULL)));
814 	if (arn->arn_rx_pp != NULL) {
815 		for (i = 0; i < SKMEM_PP_REGIONS; i++) {
816 			skmem_region_id_t reg = skmem_pp_region_ids[i];
817 			skr = ar->ar_regions[reg];
818 			if (skr != NULL) {
819 				ASSERT(!(skr->skr_mode & SKR_MODE_NOREDIRECT));
820 				skmem_region_release(skr);
821 				ar->ar_regions[reg] = NULL;
822 			}
823 		}
824 		pp_release(arn->arn_rx_pp);
825 		pp_release(arn->arn_tx_pp);
826 		arn->arn_rx_pp = NULL;
827 		arn->arn_tx_pp = NULL;
828 	}
829 	for (i = 0; i < SKMEM_PP_REGIONS; i++) {
830 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
831 	}
832 	ASSERT(arn->arn_rx_pp == NULL);
833 	ASSERT(arn->arn_tx_pp == NULL);
834 
835 	if (arn->arn_ring_cache != NULL) {
836 		skr = ar->ar_regions[SKMEM_REGION_RING];
837 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
838 		skmem_cache_destroy(arn->arn_ring_cache);
839 		arn->arn_ring_cache = NULL;
840 		skmem_region_release(skr);
841 		ar->ar_regions[SKMEM_REGION_RING] = NULL;
842 	}
843 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
844 	ASSERT(arn->arn_ring_cache == NULL);
845 
846 	/*
847 	 * Stop here if we're in the defunct context, and we're asked
848 	 * to keep the slot descriptor regions alive as they are still
849 	 * being referred to by the nexus owner (driver).
850 	 */
851 	if (defunct && arn->arn_ksd_nodefunct != 0) {
852 		ASSERT(arn->arn_ksd_nodefunct > 0);
853 		return;
854 	}
855 
856 	ASSERT(arn->arn_ksd_nodefunct == 0);
857 	skmem_arena_sd_teardown(ar, TRUE);
858 	skmem_arena_sd_teardown(ar, FALSE);
859 
860 	/* stop here if we're in the defunct context */
861 	if (defunct) {
862 		return;
863 	}
864 	if (arn->arn_schema_cache != NULL) {
865 		skr = ar->ar_regions[SKMEM_REGION_SCHEMA];
866 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
867 		skmem_cache_destroy(arn->arn_schema_cache);
868 		arn->arn_schema_cache = NULL;
869 		skmem_region_release(skr);
870 		ar->ar_regions[SKMEM_REGION_SCHEMA] = NULL;
871 	}
872 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
873 	ASSERT(arn->arn_schema_cache == NULL);
874 
875 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_HEAD]) != NULL) {
876 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
877 		skmem_region_release(skr);
878 		ar->ar_regions[SKMEM_REGION_GUARD_HEAD] = NULL;
879 	}
880 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
881 	if ((skr = ar->ar_regions[SKMEM_REGION_GUARD_TAIL]) != NULL) {
882 		ASSERT(skr->skr_mode & SKR_MODE_NOREDIRECT);
883 		skmem_region_release(skr);
884 		ar->ar_regions[SKMEM_REGION_GUARD_TAIL] = NULL;
885 	}
886 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
887 }
888 
889 /*
890  * Create an NECP arena.
891  */
892 struct skmem_arena *
skmem_arena_create_for_necp(const char * name,struct skmem_region_params * srp_ustats,struct skmem_region_params * srp_kstats,int * perr)893 skmem_arena_create_for_necp(const char *name,
894     struct skmem_region_params *srp_ustats,
895     struct skmem_region_params *srp_kstats, int *perr)
896 {
897 	struct skmem_arena_necp *arc;
898 	struct skmem_arena *ar;
899 	char cname[64];
900 
901 	*perr = 0;
902 
903 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_NECP, name);
904 	ASSERT(ar != NULL && ar->ar_zsize == AR_NECP_SIZE);
905 	arc = (struct skmem_arena_necp *)ar;
906 
907 	/*
908 	 * Must be stats region, and must be user-mappable;
909 	 * don't assert for SKMEM_REGION_CR_MONOLITHIC here
910 	 * as the client might want multi-segment mode.
911 	 */
912 	ASSERT(srp_ustats->srp_id == SKMEM_REGION_USTATS);
913 	ASSERT(srp_kstats->srp_id == SKMEM_REGION_KSTATS);
914 	ASSERT(srp_ustats->srp_cflags & SKMEM_REGION_CR_MMAPOK);
915 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_MMAPOK));
916 	ASSERT(!(srp_ustats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
917 	ASSERT(!(srp_kstats->srp_cflags & SKMEM_REGION_CR_SHAREOK));
918 	ASSERT(srp_ustats->srp_c_obj_size != 0);
919 	ASSERT(srp_kstats->srp_c_obj_size != 0);
920 	ASSERT(srp_ustats->srp_c_obj_cnt != 0);
921 	ASSERT(srp_kstats->srp_c_obj_cnt != 0);
922 	ASSERT(srp_ustats->srp_c_seg_size == srp_kstats->srp_c_seg_size);
923 	ASSERT(srp_ustats->srp_seg_cnt == srp_kstats->srp_seg_cnt);
924 	ASSERT(srp_ustats->srp_c_obj_size == srp_kstats->srp_c_obj_size);
925 	ASSERT(srp_ustats->srp_c_obj_cnt == srp_kstats->srp_c_obj_cnt);
926 
927 	AR_LOCK(ar);
928 
929 	if ((ar->ar_regions[SKMEM_REGION_USTATS] = skmem_region_create(name,
930 	    srp_ustats, NULL, NULL, NULL)) == NULL) {
931 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
932 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
933 		    srp_ustats->srp_name);
934 		goto failed;
935 	}
936 
937 	if ((ar->ar_regions[SKMEM_REGION_KSTATS] = skmem_region_create(name,
938 	    srp_kstats, NULL, NULL, NULL)) == NULL) {
939 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s region",
940 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
941 		    srp_kstats->srp_name);
942 		goto failed;
943 	}
944 
945 	skmem_region_mirror(ar->ar_regions[SKMEM_REGION_KSTATS],
946 	    ar->ar_regions[SKMEM_REGION_USTATS]);
947 
948 	/* create skmem_cache for kernel stats (without magazines) */
949 	(void) snprintf(cname, sizeof(cname), "kstats.%s", name);
950 	if ((arc->arc_kstats_cache = skmem_cache_create(cname,
951 	    srp_kstats->srp_c_obj_size, 0, necp_stats_ctor, NULL, NULL, NULL,
952 	    ar->ar_regions[SKMEM_REGION_KSTATS],
953 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
954 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create %s",
955 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS, cname);
956 		goto failed;
957 	}
958 
959 	if (skmem_arena_create_finalize(ar) != 0) {
960 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
961 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
962 		goto failed;
963 	}
964 
965 	/*
966 	 * These must never be configured for NECP arena.
967 	 *
968 	 * XXX: In theory we can add guard pages to this arena,
969 	 * but for now leave that as an exercise for the future.
970 	 */
971 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
972 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
973 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
974 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
975 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
976 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
977 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
978 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
979 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
980 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
981 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
982 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
983 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
984 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
985 	}
986 
987 	/* these must be configured for NECP arena */
988 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
989 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] != NULL);
990 
991 	++ar->ar_refcnt;        /* for caller */
992 	AR_UNLOCK(ar);
993 
994 	SKMEM_ARENA_LOCK();
995 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
996 	SKMEM_ARENA_UNLOCK();
997 
998 #if SK_LOG
999 	if (__improbable(sk_verbose != 0)) {
1000 		skmem_arena_create_region_log(ar);
1001 	}
1002 #endif /* SK_LOG */
1003 
1004 	return ar;
1005 
1006 failed:
1007 	AR_LOCK_ASSERT_HELD(ar);
1008 	skmem_arena_destroy(ar);
1009 	*perr = ENOMEM;
1010 
1011 	return NULL;
1012 }
1013 
1014 static void
skmem_arena_necp_teardown(struct skmem_arena_necp * arc,boolean_t defunct)1015 skmem_arena_necp_teardown(struct skmem_arena_necp *arc, boolean_t defunct)
1016 {
1017 #pragma unused(defunct)
1018 	struct skmem_arena *ar = &arc->arc_cmn;
1019 	struct skmem_region *skr;
1020 
1021 	AR_LOCK_ASSERT_HELD(ar);
1022 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NECP);
1023 
1024 	/* these must never be configured for NECP arena */
1025 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1026 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1027 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1028 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1029 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1030 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1031 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1032 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1033 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1034 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1035 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1036 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1037 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1038 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1039 	}
1040 
1041 	if (arc->arc_kstats_cache != NULL) {
1042 		skr = ar->ar_regions[SKMEM_REGION_KSTATS];
1043 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1044 		skmem_cache_destroy(arc->arc_kstats_cache);
1045 		arc->arc_kstats_cache = NULL;
1046 		skmem_region_release(skr);
1047 		ar->ar_regions[SKMEM_REGION_KSTATS] = NULL;
1048 
1049 		skr = ar->ar_regions[SKMEM_REGION_USTATS];
1050 		ASSERT(skr != NULL && !(skr->skr_mode & SKR_MODE_NOREDIRECT));
1051 		skmem_region_release(skr);
1052 		ar->ar_regions[SKMEM_REGION_USTATS] = NULL;
1053 	}
1054 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1055 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1056 	ASSERT(arc->arc_kstats_cache == NULL);
1057 }
1058 
1059 /*
1060  * Given an arena, return its NECP variant (if applicable).
1061  */
1062 struct skmem_arena_necp *
skmem_arena_necp(struct skmem_arena * ar)1063 skmem_arena_necp(struct skmem_arena *ar)
1064 {
1065 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_NECP)) {
1066 		return NULL;
1067 	}
1068 
1069 	return (struct skmem_arena_necp *)ar;
1070 }
1071 
1072 /*
1073  * Create a System arena.
1074  */
1075 struct skmem_arena *
skmem_arena_create_for_system(const char * name,int * perr)1076 skmem_arena_create_for_system(const char *name, int *perr)
1077 {
1078 	struct skmem_region *skrsys;
1079 	struct skmem_arena_system *ars;
1080 	struct skmem_arena *ar;
1081 
1082 	*perr = 0;
1083 
1084 	ar = skmem_arena_alloc(SKMEM_ARENA_TYPE_SYSTEM, name);
1085 	ASSERT(ar != NULL && ar->ar_zsize == AR_SYSTEM_SIZE);
1086 	ars = (struct skmem_arena_system *)ar;
1087 
1088 	AR_LOCK(ar);
1089 	/* retain system-wide sysctls region */
1090 	skrsys = skmem_get_sysctls_region();
1091 	ASSERT(skrsys != NULL && skrsys->skr_id == SKMEM_REGION_SYSCTLS);
1092 	ASSERT((skrsys->skr_mode & (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES |
1093 	    SKR_MODE_KREADONLY | SKR_MODE_UREADONLY | SKR_MODE_MONOLITHIC |
1094 	    SKR_MODE_SHAREOK)) ==
1095 	    (SKR_MODE_MMAPOK | SKR_MODE_NOMAGAZINES | SKR_MODE_UREADONLY |
1096 	    SKR_MODE_MONOLITHIC));
1097 	ar->ar_regions[SKMEM_REGION_SYSCTLS] = skrsys;
1098 	skmem_region_retain(skrsys);
1099 
1100 	/* object is valid as long as the sysctls region is retained */
1101 	ars->ars_sysctls_obj = skmem_get_sysctls_obj(&ars->ars_sysctls_objsize);
1102 	ASSERT(ars->ars_sysctls_obj != NULL);
1103 	ASSERT(ars->ars_sysctls_objsize != 0);
1104 
1105 	if (skmem_arena_create_finalize(ar) != 0) {
1106 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to finalize",
1107 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1108 		goto failed;
1109 	}
1110 
1111 	/*
1112 	 * These must never be configured for system arena.
1113 	 *
1114 	 * XXX: In theory we can add guard pages to this arena,
1115 	 * but for now leave that as an exercise for the future.
1116 	 */
1117 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1118 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1119 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1120 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1121 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1122 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1123 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1124 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1125 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1126 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1127 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1128 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1129 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1130 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1131 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1132 	}
1133 
1134 	/* these must be configured for system arena */
1135 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] != NULL);
1136 
1137 	++ar->ar_refcnt;        /* for caller */
1138 	AR_UNLOCK(ar);
1139 
1140 	SKMEM_ARENA_LOCK();
1141 	TAILQ_INSERT_TAIL(&skmem_arena_head, ar, ar_link);
1142 	SKMEM_ARENA_UNLOCK();
1143 
1144 #if SK_LOG
1145 	if (__improbable(sk_verbose != 0)) {
1146 		skmem_arena_create_region_log(ar);
1147 	}
1148 #endif /* SK_LOG */
1149 
1150 	return ar;
1151 
1152 failed:
1153 	AR_LOCK_ASSERT_HELD(ar);
1154 	skmem_arena_destroy(ar);
1155 	*perr = ENOMEM;
1156 
1157 	return NULL;
1158 }
1159 
1160 static void
skmem_arena_system_teardown(struct skmem_arena_system * ars,boolean_t defunct)1161 skmem_arena_system_teardown(struct skmem_arena_system *ars, boolean_t defunct)
1162 {
1163 	struct skmem_arena *ar = &ars->ars_cmn;
1164 	struct skmem_region *skr;
1165 
1166 	AR_LOCK_ASSERT_HELD(ar);
1167 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1168 
1169 	/* these must never be configured for system arena */
1170 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_HEAD] == NULL);
1171 	ASSERT(ar->ar_regions[SKMEM_REGION_SCHEMA] == NULL);
1172 	ASSERT(ar->ar_regions[SKMEM_REGION_RING] == NULL);
1173 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAUSD] == NULL);
1174 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFUSD] == NULL);
1175 	ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1176 	ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1177 	ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1178 	ASSERT(ar->ar_regions[SKMEM_REGION_GUARD_TAIL] == NULL);
1179 	ASSERT(ar->ar_regions[SKMEM_REGION_TXAKSD] == NULL);
1180 	ASSERT(ar->ar_regions[SKMEM_REGION_RXFKSD] == NULL);
1181 	ASSERT(ar->ar_regions[SKMEM_REGION_KSTATS] == NULL);
1182 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1183 	for (int i = 0; i < SKMEM_PP_REGIONS; i++) {
1184 		ASSERT(ar->ar_regions[skmem_pp_region_ids[i]] == NULL);
1185 	}
1186 
1187 	/* nothing to do here for now during defunct, just return */
1188 	if (defunct) {
1189 		return;
1190 	}
1191 
1192 	if (ars->ars_sysctls_obj != NULL) {
1193 		skr = ar->ar_regions[SKMEM_REGION_SYSCTLS];
1194 		ASSERT(skr != NULL && (skr->skr_mode & SKR_MODE_NOREDIRECT));
1195 		/* we didn't allocate this, so don't free it */
1196 		ars->ars_sysctls_obj = NULL;
1197 		ars->ars_sysctls_objsize = 0;
1198 		skmem_region_release(skr);
1199 		ar->ar_regions[SKMEM_REGION_SYSCTLS] = NULL;
1200 	}
1201 	ASSERT(ar->ar_regions[SKMEM_REGION_SYSCTLS] == NULL);
1202 	ASSERT(ars->ars_sysctls_obj == NULL);
1203 	ASSERT(ars->ars_sysctls_objsize == 0);
1204 }
1205 
1206 /*
1207  * Given an arena, return its System variant (if applicable).
1208  */
1209 struct skmem_arena_system *
skmem_arena_system(struct skmem_arena * ar)1210 skmem_arena_system(struct skmem_arena *ar)
1211 {
1212 	if (__improbable(ar->ar_type != SKMEM_ARENA_TYPE_SYSTEM)) {
1213 		return NULL;
1214 	}
1215 
1216 	return (struct skmem_arena_system *)ar;
1217 }
1218 
1219 void *
skmem_arena_system_sysctls_obj_addr(struct skmem_arena * ar)1220 skmem_arena_system_sysctls_obj_addr(struct skmem_arena *ar)
1221 {
1222 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1223 	return skmem_arena_system(ar)->ars_sysctls_obj;
1224 }
1225 
1226 size_t
skmem_arena_system_sysctls_obj_size(struct skmem_arena * ar)1227 skmem_arena_system_sysctls_obj_size(struct skmem_arena *ar)
1228 {
1229 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_SYSTEM);
1230 	return skmem_arena_system(ar)->ars_sysctls_objsize;
1231 }
1232 
1233 /*
1234  * Destroy a region.
1235  */
1236 static void
skmem_arena_destroy(struct skmem_arena * ar)1237 skmem_arena_destroy(struct skmem_arena *ar)
1238 {
1239 	AR_LOCK_ASSERT_HELD(ar);
1240 
1241 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags %b",
1242 	    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS);
1243 
1244 	ASSERT(ar->ar_refcnt == 0);
1245 	if (ar->ar_link.tqe_next != NULL || ar->ar_link.tqe_prev != NULL) {
1246 		AR_UNLOCK(ar);
1247 		SKMEM_ARENA_LOCK();
1248 		TAILQ_REMOVE(&skmem_arena_head, ar, ar_link);
1249 		SKMEM_ARENA_UNLOCK();
1250 		AR_LOCK(ar);
1251 		ASSERT(ar->ar_refcnt == 0);
1252 	}
1253 
1254 	/* teardown all remaining memory regions and associated resources */
1255 	skmem_arena_teardown(ar, FALSE);
1256 
1257 	if (ar->ar_ar != NULL) {
1258 		IOSKArenaDestroy(ar->ar_ar);
1259 		ar->ar_ar = NULL;
1260 	}
1261 
1262 	if (ar->ar_flags & ARF_ACTIVE) {
1263 		ar->ar_flags &= ~ARF_ACTIVE;
1264 	}
1265 
1266 	AR_UNLOCK(ar);
1267 
1268 	skmem_arena_free(ar);
1269 }
1270 
1271 /*
1272  * Teardown (or defunct) a region.
1273  */
1274 static void
skmem_arena_teardown(struct skmem_arena * ar,boolean_t defunct)1275 skmem_arena_teardown(struct skmem_arena *ar, boolean_t defunct)
1276 {
1277 	uint32_t i;
1278 
1279 	switch (ar->ar_type) {
1280 	case SKMEM_ARENA_TYPE_NEXUS:
1281 		skmem_arena_nexus_teardown((struct skmem_arena_nexus *)ar,
1282 		    defunct);
1283 		break;
1284 
1285 	case SKMEM_ARENA_TYPE_NECP:
1286 		skmem_arena_necp_teardown((struct skmem_arena_necp *)ar,
1287 		    defunct);
1288 		break;
1289 
1290 	case SKMEM_ARENA_TYPE_SYSTEM:
1291 		skmem_arena_system_teardown((struct skmem_arena_system *)ar,
1292 		    defunct);
1293 		break;
1294 
1295 	default:
1296 		VERIFY(0);
1297 		/* NOTREACHED */
1298 		__builtin_unreachable();
1299 	}
1300 
1301 	/* stop here if we're in the defunct context */
1302 	if (defunct) {
1303 		return;
1304 	}
1305 
1306 	/* take care of any remaining ones */
1307 	for (i = 0; i < SKMEM_REGIONS; i++) {
1308 		if (ar->ar_regions[i] == NULL) {
1309 			continue;
1310 		}
1311 
1312 		skmem_region_release(ar->ar_regions[i]);
1313 		ar->ar_regions[i] = NULL;
1314 	}
1315 }
1316 
1317 static int
skmem_arena_create_finalize(struct skmem_arena * ar)1318 skmem_arena_create_finalize(struct skmem_arena *ar)
1319 {
1320 	IOSKRegionRef reg[SKMEM_REGIONS];
1321 	uint32_t i, regcnt = 0;
1322 	int err = 0;
1323 
1324 	AR_LOCK_ASSERT_HELD(ar);
1325 
1326 	ASSERT(ar->ar_regions[SKMEM_REGION_INTRINSIC] == NULL);
1327 
1328 	/*
1329 	 * Prepare an array of regions that can be mapped to user task;
1330 	 * exclude regions that aren't eligible for user task mapping.
1331 	 */
1332 	bzero(&reg, sizeof(reg));
1333 	for (i = 0; i < SKMEM_REGIONS; i++) {
1334 		struct skmem_region *skr = ar->ar_regions[i];
1335 		if (skr == NULL || !(skr->skr_mode & SKR_MODE_MMAPOK)) {
1336 			continue;
1337 		}
1338 
1339 		ASSERT(skr->skr_reg != NULL);
1340 		reg[regcnt++] = skr->skr_reg;
1341 	}
1342 	ASSERT(regcnt != 0);
1343 
1344 	/*
1345 	 * Create backing IOSKArena handle.
1346 	 */
1347 	ar->ar_ar = IOSKArenaCreate(reg, (IOSKCount)regcnt);
1348 	if (ar->ar_ar == NULL) {
1349 		SK_ERR("\"%s\" ar 0x%llx flags %b failed to create "
1350 		    "IOSKArena of %u regions", ar->ar_name, SK_KVA(ar),
1351 		    ar->ar_flags, ARF_BITS, regcnt);
1352 		err = ENOMEM;
1353 		goto failed;
1354 	}
1355 
1356 	ar->ar_flags |= ARF_ACTIVE;
1357 
1358 failed:
1359 	return err;
1360 }
1361 
1362 static inline struct kalloc_type_view *
skmem_arena_zone(skmem_arena_type_t type)1363 skmem_arena_zone(skmem_arena_type_t type)
1364 {
1365 	switch (type) {
1366 	case SKMEM_ARENA_TYPE_NEXUS:
1367 		return ar_nexus_zone;
1368 
1369 	case SKMEM_ARENA_TYPE_NECP:
1370 		return ar_necp_zone;
1371 
1372 	case SKMEM_ARENA_TYPE_SYSTEM:
1373 		return ar_system_zone;
1374 
1375 	default:
1376 		VERIFY(0);
1377 		/* NOTREACHED */
1378 		__builtin_unreachable();
1379 	}
1380 }
1381 
1382 static struct skmem_arena *
skmem_arena_alloc(skmem_arena_type_t type,const char * name)1383 skmem_arena_alloc(skmem_arena_type_t type, const char *name)
1384 {
1385 	const char *ar_str = NULL;
1386 	struct skmem_arena *ar;
1387 	size_t ar_zsize = 0;
1388 
1389 	switch (type) {
1390 	case SKMEM_ARENA_TYPE_NEXUS:
1391 		ar_zsize = AR_NEXUS_SIZE;
1392 		ar_str = "nexus";
1393 		break;
1394 
1395 	case SKMEM_ARENA_TYPE_NECP:
1396 		ar_zsize = AR_NECP_SIZE;
1397 		ar_str = "necp";
1398 		break;
1399 
1400 	case SKMEM_ARENA_TYPE_SYSTEM:
1401 		ar_zsize = AR_SYSTEM_SIZE;
1402 		ar_str = "system";
1403 		break;
1404 
1405 	default:
1406 		VERIFY(0);
1407 		/* NOTREACHED */
1408 		__builtin_unreachable();
1409 	}
1410 
1411 	ar = zalloc_flags(skmem_arena_zone(type), Z_WAITOK | Z_ZERO | Z_NOFAIL);
1412 	ar->ar_type = type;
1413 	ar->ar_zsize = ar_zsize;
1414 
1415 	lck_mtx_init(&ar->ar_lock, &skmem_arena_lock_grp,
1416 	    LCK_ATTR_NULL);
1417 	(void) snprintf(ar->ar_name, sizeof(ar->ar_name),
1418 	    "%s.%s.%s", SKMEM_ARENA_PREFIX, ar_str, name);
1419 
1420 	return ar;
1421 }
1422 
1423 static void
skmem_arena_free(struct skmem_arena * ar)1424 skmem_arena_free(struct skmem_arena *ar)
1425 {
1426 #if DEBUG || DEVELOPMENT
1427 	ASSERT(ar->ar_refcnt == 0);
1428 	ASSERT(!(ar->ar_flags & ARF_ACTIVE));
1429 	ASSERT(ar->ar_ar == NULL);
1430 	ASSERT(ar->ar_mapcnt == 0);
1431 	ASSERT(SLIST_EMPTY(&ar->ar_map_head));
1432 	for (uint32_t i = 0; i < SKMEM_REGIONS; i++) {
1433 		ASSERT(ar->ar_regions[i] == NULL);
1434 	}
1435 #endif /* DEBUG || DEVELOPMENT */
1436 
1437 	lck_mtx_destroy(&ar->ar_lock, &skmem_arena_lock_grp);
1438 	zfree(skmem_arena_zone(ar->ar_type), ar);
1439 }
1440 
1441 /*
1442  * Retain an arena.
1443  */
1444 __attribute__((always_inline))
1445 static inline void
skmem_arena_retain_locked(struct skmem_arena * ar)1446 skmem_arena_retain_locked(struct skmem_arena *ar)
1447 {
1448 	AR_LOCK_ASSERT_HELD(ar);
1449 	ar->ar_refcnt++;
1450 	ASSERT(ar->ar_refcnt != 0);
1451 }
1452 
1453 void
skmem_arena_retain(struct skmem_arena * ar)1454 skmem_arena_retain(struct skmem_arena *ar)
1455 {
1456 	AR_LOCK(ar);
1457 	skmem_arena_retain_locked(ar);
1458 	AR_UNLOCK(ar);
1459 }
1460 
1461 /*
1462  * Release (and potentially destroy) an arena.
1463  */
1464 __attribute__((always_inline))
1465 static inline boolean_t
skmem_arena_release_locked(struct skmem_arena * ar)1466 skmem_arena_release_locked(struct skmem_arena *ar)
1467 {
1468 	boolean_t lastref = FALSE;
1469 
1470 	AR_LOCK_ASSERT_HELD(ar);
1471 	ASSERT(ar->ar_refcnt != 0);
1472 	if (--ar->ar_refcnt == 0) {
1473 		skmem_arena_destroy(ar);
1474 		lastref = TRUE;
1475 	} else {
1476 		lastref = FALSE;
1477 	}
1478 
1479 	return lastref;
1480 }
1481 
1482 boolean_t
skmem_arena_release(struct skmem_arena * ar)1483 skmem_arena_release(struct skmem_arena *ar)
1484 {
1485 	boolean_t lastref;
1486 
1487 	AR_LOCK(ar);
1488 	/* unlock only if this isn't the last reference */
1489 	if (!(lastref = skmem_arena_release_locked(ar))) {
1490 		AR_UNLOCK(ar);
1491 	}
1492 
1493 	return lastref;
1494 }
1495 
1496 /*
1497  * Map an arena to the task's address space.
1498  */
1499 int
skmem_arena_mmap(struct skmem_arena * ar,struct proc * p,struct skmem_arena_mmap_info * ami)1500 skmem_arena_mmap(struct skmem_arena *ar, struct proc *p,
1501     struct skmem_arena_mmap_info *ami)
1502 {
1503 	task_t task = proc_task(p);
1504 	IOReturn ioerr;
1505 	int err = 0;
1506 
1507 	ASSERT(task != kernel_task && task != TASK_NULL);
1508 	ASSERT(ami->ami_arena == NULL);
1509 	ASSERT(ami->ami_mapref == NULL);
1510 	ASSERT(ami->ami_maptask == TASK_NULL);
1511 	ASSERT(!ami->ami_redirect);
1512 
1513 	AR_LOCK(ar);
1514 	if ((ar->ar_flags & (ARF_ACTIVE | ARF_DEFUNCT)) != ARF_ACTIVE) {
1515 		err = ENODEV;
1516 		goto failed;
1517 	}
1518 
1519 	ASSERT(ar->ar_ar != NULL);
1520 	if ((ami->ami_mapref = IOSKMapperCreate(ar->ar_ar, task)) == NULL) {
1521 		err = ENOMEM;
1522 		goto failed;
1523 	}
1524 
1525 	ioerr = IOSKMapperGetAddress(ami->ami_mapref, &ami->ami_mapaddr,
1526 	    &ami->ami_mapsize);
1527 	VERIFY(ioerr == kIOReturnSuccess);
1528 
1529 	ami->ami_arena = ar;
1530 	skmem_arena_retain_locked(ar);
1531 	SLIST_INSERT_HEAD(&ar->ar_map_head, ami, ami_link);
1532 
1533 	ami->ami_maptask = task;
1534 	ar->ar_mapcnt++;
1535 	if (ar->ar_mapcnt == 1) {
1536 		ar->ar_mapsize = ami->ami_mapsize;
1537 	}
1538 
1539 	ASSERT(ami->ami_mapref != NULL);
1540 	ASSERT(ami->ami_arena == ar);
1541 	AR_UNLOCK(ar);
1542 
1543 	return 0;
1544 
1545 failed:
1546 	AR_UNLOCK(ar);
1547 	skmem_arena_munmap(ar, ami);
1548 	VERIFY(err != 0);
1549 
1550 	return err;
1551 }
1552 
1553 /*
1554  * Remove arena's memory mapping from task's address space (common code).
1555  * Returns true if caller needs to perform a deferred defunct.
1556  */
1557 static boolean_t
skmem_arena_munmap_common(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1558 skmem_arena_munmap_common(struct skmem_arena *ar,
1559     struct skmem_arena_mmap_info *ami)
1560 {
1561 	boolean_t need_defunct = FALSE;
1562 
1563 	AR_LOCK(ar);
1564 	if (ami->ami_mapref != NULL) {
1565 		IOSKMapperDestroy(ami->ami_mapref);
1566 		ami->ami_mapref = NULL;
1567 
1568 		VERIFY(ar->ar_mapcnt != 0);
1569 		ar->ar_mapcnt--;
1570 		if (ar->ar_mapcnt == 0) {
1571 			ar->ar_mapsize = 0;
1572 		}
1573 
1574 		VERIFY(ami->ami_arena == ar);
1575 		SLIST_REMOVE(&ar->ar_map_head, ami, skmem_arena_mmap_info,
1576 		    ami_link);
1577 
1578 		/*
1579 		 * We expect that the caller ensures an extra reference
1580 		 * held on the arena, in addition to the one in mmap_info.
1581 		 */
1582 		VERIFY(ar->ar_refcnt > 1);
1583 		(void) skmem_arena_release_locked(ar);
1584 		ami->ami_arena = NULL;
1585 
1586 		if (ami->ami_redirect) {
1587 			/*
1588 			 * This mapper has been redirected; decrement
1589 			 * the redirect count associated with it.
1590 			 */
1591 			VERIFY(ar->ar_maprdrcnt != 0);
1592 			ar->ar_maprdrcnt--;
1593 		} else if (ar->ar_maprdrcnt != 0 &&
1594 		    ar->ar_maprdrcnt == ar->ar_mapcnt) {
1595 			/*
1596 			 * The are other mappers for this arena that have
1597 			 * all been redirected, but the arena wasn't marked
1598 			 * inactive by skmem_arena_redirect() last time since
1599 			 * this particular mapper that we just destroyed
1600 			 * was using it.  Now that it's gone, finish the
1601 			 * postponed work below once we return to caller.
1602 			 */
1603 			ASSERT(ar->ar_flags & ARF_ACTIVE);
1604 			ar->ar_flags &= ~ARF_ACTIVE;
1605 			need_defunct = TRUE;
1606 		}
1607 	}
1608 	ASSERT(ami->ami_mapref == NULL);
1609 	ASSERT(ami->ami_arena == NULL);
1610 
1611 	ami->ami_maptask = TASK_NULL;
1612 	ami->ami_mapaddr = 0;
1613 	ami->ami_mapsize = 0;
1614 	ami->ami_redirect = FALSE;
1615 
1616 	AR_UNLOCK(ar);
1617 
1618 	return need_defunct;
1619 }
1620 
1621 /*
1622  * Remove arena's memory mapping from task's address space (channel version).
1623  * Will perform a deferred defunct if needed.
1624  */
1625 void
skmem_arena_munmap_channel(struct skmem_arena * ar,struct kern_channel * ch)1626 skmem_arena_munmap_channel(struct skmem_arena *ar, struct kern_channel *ch)
1627 {
1628 	SK_LOCK_ASSERT_HELD();
1629 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
1630 
1631 	/*
1632 	 * If this is this is on a channel that was holding the last
1633 	 * active reference count on the arena, and that there are
1634 	 * other defunct channels pointing to that arena, perform the
1635 	 * actual arena defunct now.
1636 	 */
1637 	if (skmem_arena_munmap_common(ar, &ch->ch_mmap)) {
1638 		struct kern_nexus *nx = ch->ch_nexus;
1639 		struct kern_nexus_domain_provider *nxdom_prov = NX_DOM_PROV(nx);
1640 
1641 		/*
1642 		 * Similar to kern_channel_defunct(), where we let the
1643 		 * domain provider complete the defunct.  At this point
1644 		 * both sk_lock and the channel locks are held, and so
1645 		 * we indicate that to the callee.
1646 		 */
1647 		nxdom_prov->nxdom_prov_dom->nxdom_defunct_finalize(nxdom_prov,
1648 		    nx, ch, TRUE);
1649 	}
1650 }
1651 
1652 /*
1653  * Remove arena's memory mapping from task's address space (generic).
1654  * This routine should only be called on non-channel related arenas.
1655  */
1656 void
skmem_arena_munmap(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami)1657 skmem_arena_munmap(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami)
1658 {
1659 	(void) skmem_arena_munmap_common(ar, ami);
1660 }
1661 
1662 /*
1663  * Redirect eligible memory regions in the task's memory map so that
1664  * they get overwritten and backed with anonymous (zero-filled) pages.
1665  */
1666 int
skmem_arena_mredirect(struct skmem_arena * ar,struct skmem_arena_mmap_info * ami,struct proc * p,boolean_t * need_defunct)1667 skmem_arena_mredirect(struct skmem_arena *ar, struct skmem_arena_mmap_info *ami,
1668     struct proc *p, boolean_t *need_defunct)
1669 {
1670 #pragma unused(p)
1671 	int err = 0;
1672 
1673 	*need_defunct = FALSE;
1674 
1675 	AR_LOCK(ar);
1676 	ASSERT(ar->ar_ar != NULL);
1677 	if (ami->ami_redirect) {
1678 		err = EALREADY;
1679 	} else if (ami->ami_mapref == NULL) {
1680 		err = ENXIO;
1681 	} else {
1682 		VERIFY(ar->ar_mapcnt != 0);
1683 		ASSERT(ar->ar_flags & ARF_ACTIVE);
1684 		VERIFY(ami->ami_arena == ar);
1685 		/*
1686 		 * This effectively overwrites the mappings for all
1687 		 * redirectable memory regions (i.e. those without the
1688 		 * SKMEM_REGION_CR_NOREDIRECT flag) while preserving their
1689 		 * protection flags.  Accesses to these regions will be
1690 		 * redirected to anonymous, zero-filled pages.
1691 		 */
1692 		IOSKMapperRedirect(ami->ami_mapref);
1693 		ami->ami_redirect = TRUE;
1694 
1695 		/*
1696 		 * Mark the arena as inactive if all mapper instances are
1697 		 * redirected; otherwise, we do this later during unmap.
1698 		 * Once inactive, the arena will not allow further mmap,
1699 		 * and it is ready to be defunct later.
1700 		 */
1701 		if (++ar->ar_maprdrcnt == ar->ar_mapcnt) {
1702 			ar->ar_flags &= ~ARF_ACTIVE;
1703 			*need_defunct = TRUE;
1704 		}
1705 	}
1706 	AR_UNLOCK(ar);
1707 
1708 	SK_DF(((err != 0) ? SK_VERB_ERROR : SK_VERB_DEFAULT),
1709 	    "%s(%d) \"%s\" ar 0x%llx flags %b inactive %u need_defunct %u "
1710 	    "err %d", sk_proc_name_address(p), sk_proc_pid(p), ar->ar_name,
1711 	    SK_KVA(ar), ar->ar_flags, ARF_BITS, !(ar->ar_flags & ARF_ACTIVE),
1712 	    *need_defunct, err);
1713 
1714 	return err;
1715 }
1716 
1717 /*
1718  * Defunct a region.
1719  */
1720 int
skmem_arena_defunct(struct skmem_arena * ar)1721 skmem_arena_defunct(struct skmem_arena *ar)
1722 {
1723 	AR_LOCK(ar);
1724 
1725 	SK_DF(SK_VERB_MEM_ARENA, "\"%s\" ar 0x%llx flags 0x%b", ar->ar_name,
1726 	    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1727 
1728 	if (ar->ar_flags & ARF_DEFUNCT) {
1729 		AR_UNLOCK(ar);
1730 		return EALREADY;
1731 	} else if (ar->ar_flags & ARF_ACTIVE) {
1732 		AR_UNLOCK(ar);
1733 		return EBUSY;
1734 	}
1735 
1736 	/* purge the caches now */
1737 	skmem_arena_reap_locked(ar, TRUE);
1738 
1739 	/* teardown eligible memory regions and associated resources */
1740 	skmem_arena_teardown(ar, TRUE);
1741 
1742 	ar->ar_flags |= ARF_DEFUNCT;
1743 
1744 	AR_UNLOCK(ar);
1745 
1746 	return 0;
1747 }
1748 
1749 /*
1750  * Retrieve total and in-use memory statistics of regions in the arena.
1751  */
1752 void
skmem_arena_get_stats(struct skmem_arena * ar,uint64_t * mem_total,uint64_t * mem_inuse)1753 skmem_arena_get_stats(struct skmem_arena *ar, uint64_t *mem_total,
1754     uint64_t *mem_inuse)
1755 {
1756 	uint32_t i;
1757 
1758 	if (mem_total != NULL) {
1759 		*mem_total = 0;
1760 	}
1761 	if (mem_inuse != NULL) {
1762 		*mem_inuse = 0;
1763 	}
1764 
1765 	AR_LOCK(ar);
1766 	for (i = 0; i < SKMEM_REGIONS; i++) {
1767 		if (ar->ar_regions[i] == NULL) {
1768 			continue;
1769 		}
1770 
1771 		if (mem_total != NULL) {
1772 			*mem_total += AR_MEM_TOTAL(ar, i);
1773 		}
1774 		if (mem_inuse != NULL) {
1775 			*mem_inuse += AR_MEM_INUSE(ar, i);
1776 		}
1777 	}
1778 	AR_UNLOCK(ar);
1779 }
1780 
1781 /*
1782  * Retrieve the offset of a particular region (identified by its ID)
1783  * from the base of the arena.
1784  */
1785 mach_vm_offset_t
skmem_arena_get_region_offset(struct skmem_arena * ar,skmem_region_id_t id)1786 skmem_arena_get_region_offset(struct skmem_arena *ar, skmem_region_id_t id)
1787 {
1788 	mach_vm_offset_t offset = 0;
1789 	uint32_t i;
1790 
1791 	ASSERT(id < SKMEM_REGIONS);
1792 
1793 	AR_LOCK(ar);
1794 	for (i = 0; i < id; i++) {
1795 		if (ar->ar_regions[i] == NULL) {
1796 			continue;
1797 		}
1798 
1799 		offset += ar->ar_regions[i]->skr_size;
1800 	}
1801 	AR_UNLOCK(ar);
1802 
1803 	return offset;
1804 }
1805 
1806 static void
skmem_reap_pbufpool_caches(struct kern_pbufpool * pp,boolean_t purge)1807 skmem_reap_pbufpool_caches(struct kern_pbufpool *pp, boolean_t purge)
1808 {
1809 	if (pp->pp_kmd_cache != NULL) {
1810 		skmem_cache_reap_now(pp->pp_kmd_cache, purge);
1811 	}
1812 	if (PP_BUF_CACHE_DEF(pp) != NULL) {
1813 		skmem_cache_reap_now(PP_BUF_CACHE_DEF(pp), purge);
1814 	}
1815 	if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1816 		skmem_cache_reap_now(PP_BUF_CACHE_LARGE(pp), purge);
1817 	}
1818 	if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1819 		skmem_cache_reap_now(PP_KBFT_CACHE_DEF(pp), purge);
1820 	}
1821 	if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1822 		skmem_cache_reap_now(PP_KBFT_CACHE_LARGE(pp), purge);
1823 	}
1824 }
1825 
1826 /*
1827  * Reap all of configured caches in the arena, so that any excess amount
1828  * outside of their working sets gets released to their respective backing
1829  * regions.  If purging is specified, we empty the caches' working sets,
1830  * including everything that's cached at the CPU layer.
1831  */
1832 static void
skmem_arena_reap_locked(struct skmem_arena * ar,boolean_t purge)1833 skmem_arena_reap_locked(struct skmem_arena *ar, boolean_t purge)
1834 {
1835 	struct skmem_arena_nexus *arn;
1836 	struct skmem_arena_necp *arc;
1837 	struct kern_pbufpool *pp;
1838 
1839 	AR_LOCK_ASSERT_HELD(ar);
1840 
1841 	switch (ar->ar_type) {
1842 	case SKMEM_ARENA_TYPE_NEXUS:
1843 		arn = (struct skmem_arena_nexus *)ar;
1844 		if (arn->arn_schema_cache != NULL) {
1845 			skmem_cache_reap_now(arn->arn_schema_cache, purge);
1846 		}
1847 		if (arn->arn_ring_cache != NULL) {
1848 			skmem_cache_reap_now(arn->arn_ring_cache, purge);
1849 		}
1850 		if ((pp = arn->arn_rx_pp) != NULL) {
1851 			skmem_reap_pbufpool_caches(pp, purge);
1852 		}
1853 		if ((pp = arn->arn_tx_pp) != NULL && pp != arn->arn_rx_pp) {
1854 			skmem_reap_pbufpool_caches(pp, purge);
1855 		}
1856 		break;
1857 
1858 	case SKMEM_ARENA_TYPE_NECP:
1859 		arc = (struct skmem_arena_necp *)ar;
1860 		if (arc->arc_kstats_cache != NULL) {
1861 			skmem_cache_reap_now(arc->arc_kstats_cache, purge);
1862 		}
1863 		break;
1864 
1865 	case SKMEM_ARENA_TYPE_SYSTEM:
1866 		break;
1867 	}
1868 }
1869 
1870 void
skmem_arena_reap(struct skmem_arena * ar,boolean_t purge)1871 skmem_arena_reap(struct skmem_arena *ar, boolean_t purge)
1872 {
1873 	AR_LOCK(ar);
1874 	skmem_arena_reap_locked(ar, purge);
1875 	AR_UNLOCK(ar);
1876 }
1877 
1878 #if SK_LOG
1879 SK_LOG_ATTRIBUTE
1880 static void
skmem_arena_create_region_log(struct skmem_arena * ar)1881 skmem_arena_create_region_log(struct skmem_arena *ar)
1882 {
1883 	char label[32];
1884 	int i;
1885 
1886 	switch (ar->ar_type) {
1887 	case SKMEM_ARENA_TYPE_NEXUS:
1888 		SK_D("\"%s\" ar 0x%llx flags %b rx_pp 0x%llx tx_pp 0x%llu",
1889 		    ar->ar_name, SK_KVA(ar), ar->ar_flags, ARF_BITS,
1890 		    SK_KVA(skmem_arena_nexus(ar)->arn_rx_pp),
1891 		    SK_KVA(skmem_arena_nexus(ar)->arn_tx_pp));
1892 		break;
1893 
1894 	case SKMEM_ARENA_TYPE_NECP:
1895 	case SKMEM_ARENA_TYPE_SYSTEM:
1896 		SK_D("\"%s\" ar 0x%llx flags %b", ar->ar_name,
1897 		    SK_KVA(ar), ar->ar_flags, ARF_BITS);
1898 		break;
1899 	}
1900 
1901 	for (i = 0; i < SKMEM_REGIONS; i++) {
1902 		if (ar->ar_regions[i] == NULL) {
1903 			continue;
1904 		}
1905 
1906 		(void) snprintf(label, sizeof(label), "REGION_%s:",
1907 		    skmem_region_id2name(i));
1908 		SK_D("  %-16s %6u KB s:[%2u x %6u KB] "
1909 		    "o:[%4u x %6u -> %4u x %6u]", label,
1910 		    (uint32_t)AR_MEM_TOTAL(ar, i) >> 10,
1911 		    (uint32_t)AR_MEM_SEGCNT(ar, i),
1912 		    (uint32_t)AR_MEM_SEGSIZE(ar, i) >> 10,
1913 		    (uint32_t)AR_MEM_OBJCNT_R(ar, i),
1914 		    (uint32_t)AR_MEM_OBJSIZE_R(ar, i),
1915 		    (uint32_t)AR_MEM_OBJCNT_C(ar, i),
1916 		    (uint32_t)AR_MEM_OBJSIZE_C(ar, i));
1917 	}
1918 }
1919 #endif /* SK_LOG */
1920 
1921 static size_t
skmem_arena_mib_get_stats(struct skmem_arena * ar,void * out,size_t len)1922 skmem_arena_mib_get_stats(struct skmem_arena *ar, void *out, size_t len)
1923 {
1924 	size_t actual_space = sizeof(struct sk_stats_arena);
1925 	struct sk_stats_arena *sar = out;
1926 	struct skmem_arena_mmap_info *ami = NULL;
1927 	pid_t proc_pid;
1928 	int i;
1929 
1930 	if (out == NULL || len < actual_space) {
1931 		goto done;
1932 	}
1933 
1934 	AR_LOCK(ar);
1935 	(void) snprintf(sar->sar_name, sizeof(sar->sar_name),
1936 	    "%s", ar->ar_name);
1937 	sar->sar_type = (sk_stats_arena_type_t)ar->ar_type;
1938 	sar->sar_mapsize = (uint64_t)ar->ar_mapsize;
1939 	i = 0;
1940 	SLIST_FOREACH(ami, &ar->ar_map_head, ami_link) {
1941 		if (ami->ami_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS) {
1942 			struct kern_channel *ch;
1943 			ch = container_of(ami, struct kern_channel, ch_mmap);
1944 			proc_pid = ch->ch_pid;
1945 		} else {
1946 			ASSERT((ami->ami_arena->ar_type ==
1947 			    SKMEM_ARENA_TYPE_NECP) ||
1948 			    (ami->ami_arena->ar_type ==
1949 			    SKMEM_ARENA_TYPE_SYSTEM));
1950 			proc_pid =
1951 			    necp_client_get_proc_pid_from_arena_info(ami);
1952 		}
1953 		sar->sar_mapped_pids[i++] = proc_pid;
1954 		if (i >= SK_STATS_ARENA_MAPPED_PID_MAX) {
1955 			break;
1956 		}
1957 	}
1958 
1959 	for (i = 0; i < SKMEM_REGIONS; i++) {
1960 		struct skmem_region *skr = ar->ar_regions[i];
1961 		uuid_t *sreg_uuid = &sar->sar_regions_uuid[i];
1962 
1963 		if (skr == NULL) {
1964 			uuid_clear(*sreg_uuid);
1965 			continue;
1966 		}
1967 
1968 		uuid_copy(*sreg_uuid, skr->skr_uuid);
1969 	}
1970 	AR_UNLOCK(ar);
1971 
1972 done:
1973 	return actual_space;
1974 }
1975 
1976 static int
1977 skmem_arena_mib_get_sysctl SYSCTL_HANDLER_ARGS
1978 {
1979 #pragma unused(arg1, arg2, oidp)
1980 	struct skmem_arena *ar;
1981 	size_t actual_space;
1982 	size_t buffer_space;
1983 	size_t allocated_space;
1984 	caddr_t buffer = NULL;
1985 	caddr_t scan;
1986 	int error = 0;
1987 
1988 	if (!kauth_cred_issuser(kauth_cred_get())) {
1989 		return EPERM;
1990 	}
1991 
1992 	net_update_uptime();
1993 	buffer_space = req->oldlen;
1994 	if (req->oldptr != USER_ADDR_NULL && buffer_space != 0) {
1995 		if (buffer_space > SK_SYSCTL_ALLOC_MAX) {
1996 			buffer_space = SK_SYSCTL_ALLOC_MAX;
1997 		}
1998 		allocated_space = buffer_space;
1999 		buffer = sk_alloc_data(allocated_space, Z_WAITOK, skmem_tag_arena_mib);
2000 		if (__improbable(buffer == NULL)) {
2001 			return ENOBUFS;
2002 		}
2003 	} else if (req->oldptr == USER_ADDR_NULL) {
2004 		buffer_space = 0;
2005 	}
2006 	actual_space = 0;
2007 	scan = buffer;
2008 
2009 	SKMEM_ARENA_LOCK();
2010 	TAILQ_FOREACH(ar, &skmem_arena_head, ar_link) {
2011 		size_t size = skmem_arena_mib_get_stats(ar, scan, buffer_space);
2012 		if (scan != NULL) {
2013 			if (buffer_space < size) {
2014 				/* supplied buffer too small, stop copying */
2015 				error = ENOMEM;
2016 				break;
2017 			}
2018 			scan += size;
2019 			buffer_space -= size;
2020 		}
2021 		actual_space += size;
2022 	}
2023 	SKMEM_ARENA_UNLOCK();
2024 
2025 	if (actual_space != 0) {
2026 		int out_error = SYSCTL_OUT(req, buffer, actual_space);
2027 		if (out_error != 0) {
2028 			error = out_error;
2029 		}
2030 	}
2031 	if (buffer != NULL) {
2032 		sk_free_data(buffer, allocated_space);
2033 	}
2034 
2035 	return error;
2036 }
2037