xref: /xnu-12377.41.6/libsyscall/mach/vm_reclaim.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #if defined(__LP64__)
30 /*
31  * Userspace functions for manipulating the reclaim buffer.
32  */
33 #include <inttypes.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #include <mach/error.h>
37 #include <mach/kern_return.h>
38 #include <mach/mach.h>
39 #include <mach/mach_time.h>
40 #include <mach/mach_traps.h>
41 #include <mach/mach_vm.h>
42 #include <mach/vm_reclaim_private.h>
43 #undef _mach_vm_user_
44 #include <mach/mach_vm_internal.h>
45 #include <mach/vm_map.h>
46 #include <mach/vm_page_size.h>
47 #include <os/atomic_private.h>
48 #include <os/overflow.h>
49 #include <sys/param.h>
50 #include <TargetConditionals.h>
51 
52 
53 #pragma mark Utilities
54 #define _assert(__op, __condition, __cause) \
55 	do { \
56 	        if (!(__condition)) { \
57 	                __builtin_trap(); \
58 	        } \
59 	} while (false)
60 #define _abort(__op, __cause) \
61 	do { \
62 	        __builtin_trap(); \
63 	} while(false)
64 
65 _Static_assert(VM_RECLAIM_MAX_CAPACITY <= UINT32_MAX, "Max capacity must fit in mach_vm_reclaim_count_t");
66 
67 static inline struct mach_vm_reclaim_entry_s
construct_entry(mach_vm_address_t start_addr,uint32_t size,mach_vm_reclaim_action_t behavior)68 construct_entry(
69 	mach_vm_address_t start_addr,
70 	uint32_t size,
71 	mach_vm_reclaim_action_t behavior)
72 {
73 	struct mach_vm_reclaim_entry_s entry = {0ULL};
74 	entry.address = start_addr;
75 	entry.size = size;
76 	entry.behavior = behavior;
77 	return entry;
78 }
79 
80 static uint64_t
max_buffer_len_for_size(mach_vm_size_t size)81 max_buffer_len_for_size(mach_vm_size_t size)
82 {
83 	mach_vm_size_t entries_size = size - offsetof(struct mach_vm_reclaim_ring_s, entries);
84 	return entries_size / sizeof(struct mach_vm_reclaim_entry_s);
85 }
86 
87 static mach_vm_reclaim_count_t
round_buffer_len(mach_vm_reclaim_count_t count)88 round_buffer_len(mach_vm_reclaim_count_t count)
89 {
90 	mach_vm_reclaim_count_t rounded_count;
91 	mach_vm_size_t buffer_size =
92 	    offsetof(struct mach_vm_reclaim_ring_s, entries) +
93 	    (count * sizeof(struct mach_vm_reclaim_entry_s));
94 	mach_vm_size_t rounded_size = mach_vm_round_page(buffer_size);
95 	uint64_t num_entries = max_buffer_len_for_size(rounded_size);
96 	if (os_convert_overflow(num_entries, &rounded_count)) {
97 		return UINT32_MAX;
98 	}
99 	return rounded_count;
100 }
101 
102 mach_vm_reclaim_error_t
mach_vm_reclaim_ring_allocate(mach_vm_reclaim_ring_t * ring_out,mach_vm_reclaim_count_t initial_capacity,mach_vm_reclaim_count_t max_capacity)103 mach_vm_reclaim_ring_allocate(
104 	mach_vm_reclaim_ring_t *ring_out,
105 	mach_vm_reclaim_count_t initial_capacity,
106 	mach_vm_reclaim_count_t max_capacity)
107 {
108 	kern_return_t kr;
109 	mach_vm_address_t vm_addr = 0;
110 	uint64_t sampling_period_abs;
111 
112 	if (ring_out == NULL || max_capacity < initial_capacity ||
113 	    initial_capacity == 0 || max_capacity == 0) {
114 		return VM_RECLAIM_INVALID_ARGUMENT;
115 	}
116 	if (max_capacity > VM_RECLAIM_MAX_CAPACITY) {
117 		return VM_RECLAIM_INVALID_CAPACITY;
118 	}
119 
120 	*ring_out = NULL;
121 	kr = mach_vm_deferred_reclamation_buffer_allocate(mach_task_self(),
122 	    &vm_addr, &sampling_period_abs, initial_capacity, max_capacity);
123 	if (kr == ERR_SUCCESS) {
124 		mach_vm_reclaim_ring_t ring =
125 		    (mach_vm_reclaim_ring_t)vm_addr;
126 		ring->last_sample_abs = mach_absolute_time();
127 		ring->reclaimable_bytes = 0;
128 		ring->reclaimable_bytes_min = 0;
129 		ring->len = initial_capacity;
130 		ring->max_len = max_capacity;
131 		ring->sampling_period_abs = sampling_period_abs;
132 		*ring_out = ring;
133 	}
134 	return kr;
135 }
136 
137 mach_vm_reclaim_error_t
mach_vm_reclaim_ring_resize(mach_vm_reclaim_ring_t ring,mach_vm_reclaim_count_t capacity)138 mach_vm_reclaim_ring_resize(
139 	mach_vm_reclaim_ring_t ring,
140 	mach_vm_reclaim_count_t capacity)
141 {
142 	mach_error_t err;
143 	mach_vm_size_t bytes_reclaimed = 0;
144 
145 	if (ring == NULL) {
146 		return VM_RECLAIM_INVALID_RING;
147 	}
148 	if (capacity == 0 || capacity > ring->max_len) {
149 		return VM_RECLAIM_INVALID_CAPACITY;
150 	}
151 
152 	err = mach_vm_deferred_reclamation_buffer_resize(mach_task_self(),
153 	    capacity, &bytes_reclaimed);
154 	if (err == ERR_SUCCESS) {
155 		ring->len = capacity;
156 		/* Reset the accounting now that we've flushed the buffer */
157 		ring->last_sample_abs = mach_absolute_time();
158 	}
159 	size_t reclaimable_bytes = os_atomic_sub(&ring->reclaimable_bytes, bytes_reclaimed, relaxed);
160 	os_atomic_min(&ring->reclaimable_bytes_min, reclaimable_bytes, relaxed);
161 	return err;
162 }
163 
164 mach_vm_reclaim_count_t
mach_vm_reclaim_round_capacity(mach_vm_reclaim_count_t count)165 mach_vm_reclaim_round_capacity(
166 	mach_vm_reclaim_count_t count)
167 {
168 	if (count > VM_RECLAIM_MAX_CAPACITY) {
169 		return VM_RECLAIM_MAX_CAPACITY;
170 	}
171 	return round_buffer_len(count);
172 }
173 
174 mach_vm_reclaim_error_t
mach_vm_reclaim_try_enter(mach_vm_reclaim_ring_t ring,mach_vm_address_t region_start,mach_vm_size_t region_size,mach_vm_reclaim_action_t action,mach_vm_reclaim_id_t * id,bool * should_update_kernel_accounting)175 mach_vm_reclaim_try_enter(
176 	mach_vm_reclaim_ring_t ring,
177 	mach_vm_address_t region_start,
178 	mach_vm_size_t region_size,
179 	mach_vm_reclaim_action_t action,
180 	mach_vm_reclaim_id_t *id,
181 	bool *should_update_kernel_accounting)
182 {
183 	mach_vm_reclaim_id_t tail = 0, head = 0, original_tail = 0, busy = 0;
184 	mach_vm_reclaim_entry_t entries = ring->entries;
185 	uint64_t buffer_len = (uint64_t)ring->len;
186 	*should_update_kernel_accounting = false;
187 
188 	if (ring == NULL) {
189 		return VM_RECLAIM_INVALID_RING;
190 	}
191 	if (id == NULL) {
192 		return VM_RECLAIM_INVALID_ID;
193 	}
194 
195 	uint32_t size32;
196 	if (os_convert_overflow(region_size, &size32)) {
197 		/* regions must fit in 32-bits */
198 		*id = VM_RECLAIM_ID_NULL;
199 		return VM_RECLAIM_INVALID_REGION_SIZE;
200 	}
201 
202 	mach_vm_reclaim_id_t requested_id = *id;
203 	*id = VM_RECLAIM_ID_NULL;
204 
205 	if (requested_id == VM_RECLAIM_ID_NULL) {
206 		tail = os_atomic_load_wide(&ring->tail, relaxed);
207 		head = os_atomic_load_wide(&ring->head, relaxed);
208 
209 		if (tail % buffer_len == head % buffer_len && tail > head) {
210 			/* Buffer is full */
211 			return VM_RECLAIM_SUCCESS;
212 		}
213 
214 		/*
215 		 * idx must be >= head & the buffer is not full so it's not possible for the kernel to be acting on the entry at (tail + 1) % size.
216 		 * Thus we don't need to check the busy pointer here.
217 		 */
218 		struct mach_vm_reclaim_entry_s entry = construct_entry(region_start, size32, action);
219 		entries[tail % buffer_len] = entry;
220 		os_atomic_thread_fence(seq_cst); // tail increment can not be seen before the entry is cleared in the buffer
221 		os_atomic_inc(&ring->tail, relaxed);
222 		*id = tail;
223 	} else {
224 		head = os_atomic_load_wide(&ring->head, relaxed);
225 		if (requested_id < head) {
226 			/*
227 			 * This is just a fast path for the case where the buffer has wrapped.
228 			 * It's not strictly necessary beacuse idx must also be < busy.
229 			 * That's why we can use a relaxed load for the head ptr.
230 			 */
231 			return VM_RECLAIM_SUCCESS;
232 		}
233 		/* Attempt to move tail to idx */
234 		original_tail = os_atomic_load_wide(&ring->tail, relaxed);
235 		_assert("mach_vm_reclaim_mark_free_with_id",
236 		    requested_id < original_tail, original_tail);
237 
238 		os_atomic_store_wide(&ring->tail, requested_id, relaxed);
239 		os_atomic_thread_fence(seq_cst); // Our write to tail must happen before our read of busy
240 		busy = os_atomic_load_wide(&ring->busy, relaxed);
241 		if (requested_id < busy) {
242 			/* Kernel is acting on this entry. Undo. */
243 			os_atomic_store_wide(&ring->tail, original_tail, relaxed);
244 			return VM_RECLAIM_SUCCESS;
245 		}
246 
247 		mach_vm_reclaim_entry_t entry = &entries[requested_id % buffer_len];
248 		_assert("mach_vm_reclaim_try_enter",
249 		    entry->address == 0 && entry->size == 0, entry->address);
250 
251 		/* Sucessfully moved tail back. Can now overwrite the entry */
252 		*entry = construct_entry(region_start, size32, action);
253 
254 		/* Tail increment can not be seen before the entry is set in the buffer */
255 		os_atomic_thread_fence(seq_cst);
256 		/* Reset tail. */
257 		os_atomic_store_wide(&ring->tail, original_tail, relaxed);
258 		*id = requested_id;
259 	}
260 
261 	size_t reclaimable_bytes = os_atomic_add(&ring->reclaimable_bytes, region_size, relaxed);
262 	os_atomic_min(&ring->reclaimable_bytes_min, reclaimable_bytes, relaxed);
263 
264 	uint64_t now = mach_absolute_time();
265 	if (now - ring->last_sample_abs >= ring->sampling_period_abs) {
266 		*should_update_kernel_accounting = true;
267 	}
268 	return VM_RECLAIM_SUCCESS;
269 }
270 
271 mach_vm_reclaim_error_t
mach_vm_reclaim_try_cancel(mach_vm_reclaim_ring_t ring,mach_vm_reclaim_id_t id,mach_vm_address_t region_start,mach_vm_size_t region_size,mach_vm_reclaim_action_t behavior,mach_vm_reclaim_state_t * state,bool * should_update_kernel_accounting)272 mach_vm_reclaim_try_cancel(
273 	mach_vm_reclaim_ring_t ring,
274 	mach_vm_reclaim_id_t id,
275 	mach_vm_address_t region_start,
276 	mach_vm_size_t region_size,
277 	mach_vm_reclaim_action_t behavior,
278 	mach_vm_reclaim_state_t *state,
279 	bool *should_update_kernel_accounting)
280 {
281 	mach_vm_reclaim_entry_t entries = ring->entries;
282 	uint64_t buffer_len = (uint64_t)ring->len;
283 	uint64_t head = 0, busy = 0, original_tail = 0;
284 
285 	if (ring == NULL) {
286 		return VM_RECLAIM_INVALID_RING;
287 	}
288 	if (id == VM_RECLAIM_ID_NULL) {
289 		/* The entry was never put in the reclaim ring buffer */
290 		return VM_RECLAIM_INVALID_ID;
291 	}
292 	if (state == NULL || should_update_kernel_accounting == NULL) {
293 		return VM_RECLAIM_INVALID_ARGUMENT;
294 	}
295 
296 	*should_update_kernel_accounting = false;
297 
298 	uint32_t size32;
299 	if (os_convert_overflow(region_size, &size32)) {
300 		/* Regions must fit in 32-bits */
301 		return VM_RECLAIM_INVALID_REGION_SIZE;
302 	}
303 
304 	head = os_atomic_load_wide(&ring->head, relaxed);
305 	if (id < head) {
306 		/*
307 		 * This is just a fast path for the case where the buffer has wrapped.
308 		 * It's not strictly necessary beacuse idx must also be < busy.
309 		 * That's why we can use a relaxed load for the head ptr.
310 		 */
311 		switch (behavior) {
312 		case VM_RECLAIM_DEALLOCATE:
313 			/* Entry has been deallocated and is not safe to re-use */
314 			*state = VM_RECLAIM_DEALLOCATED;
315 			break;
316 		case VM_RECLAIM_FREE:
317 			/* Entry has been freed, the virtual region is now safe to re-use */
318 			*state = VM_RECLAIM_FREED;
319 			break;
320 		default:
321 			return VM_RECLAIM_INVALID_ARGUMENT;
322 		}
323 		return VM_RECLAIM_SUCCESS;
324 	}
325 
326 	/* Attempt to move tail to idx */
327 	original_tail = os_atomic_load_wide(&ring->tail, relaxed);
328 	_assert("mach_vm_reclaim_mark_used", id < original_tail, original_tail);
329 
330 	os_atomic_store_wide(&ring->tail, id, relaxed);
331 	/* Our write to tail must happen before our read of busy */
332 	os_atomic_thread_fence(seq_cst);
333 	busy = os_atomic_load_wide(&ring->busy, relaxed);
334 	if (id < busy) {
335 		/*
336 		 * This entry is in the process of being reclaimed. It is
337 		 * never safe to re-use while in this state.
338 		 */
339 		os_atomic_store_wide(&ring->tail, original_tail, relaxed);
340 		*state = VM_RECLAIM_BUSY;
341 		return VM_RECLAIM_SUCCESS;
342 	}
343 	mach_vm_reclaim_entry_t entry = &entries[id % buffer_len];
344 	_assert("mach_vm_reclaim_mark_used", entry->size == region_size, entry->size);
345 	_assert("mach_vm_reclaim_mark_used", entry->address == region_start, entry->address);
346 	_assert("mach_vm_reclaim_mark_used", entry->behavior == behavior, entry->behavior);
347 
348 	/* Sucessfully moved tail back. Can now overwrite the entry */
349 	memset(entry, 0, sizeof(struct mach_vm_reclaim_entry_s));
350 	/* tail increment can not be seen before the entry is cleared in the buffer */
351 	os_atomic_thread_fence(seq_cst);
352 	/* Reset tail. */
353 	os_atomic_store_wide(&ring->tail, original_tail, relaxed);
354 
355 	size_t reclaimable_bytes = os_atomic_sub(&ring->reclaimable_bytes, region_size, relaxed);
356 	os_atomic_min(&ring->reclaimable_bytes_min, reclaimable_bytes, relaxed);
357 
358 	uint64_t now = mach_absolute_time();
359 	if (now - ring->last_sample_abs >= ring->sampling_period_abs) {
360 		*should_update_kernel_accounting = true;
361 	}
362 	*state = VM_RECLAIM_UNRECLAIMED;
363 	return VM_RECLAIM_SUCCESS;
364 }
365 
366 mach_vm_reclaim_error_t
mach_vm_reclaim_query_state(mach_vm_reclaim_ring_t ring,mach_vm_reclaim_id_t id,mach_vm_reclaim_action_t action,mach_vm_reclaim_state_t * state)367 mach_vm_reclaim_query_state(
368 	mach_vm_reclaim_ring_t ring,
369 	mach_vm_reclaim_id_t id,
370 	mach_vm_reclaim_action_t action,
371 	mach_vm_reclaim_state_t *state)
372 {
373 	if (ring == NULL) {
374 		return VM_RECLAIM_INVALID_RING;
375 	}
376 	if (id == VM_RECLAIM_ID_NULL) {
377 		return VM_RECLAIM_INVALID_ID;
378 	}
379 
380 	mach_vm_reclaim_id_t head = os_atomic_load_wide(&ring->head, relaxed);
381 	if (id < head) {
382 		switch (action) {
383 		case VM_RECLAIM_FREE:
384 			*state = VM_RECLAIM_FREED;
385 			break;
386 		case VM_RECLAIM_DEALLOCATE:
387 			*state = VM_RECLAIM_DEALLOCATED;
388 			break;
389 		default:
390 			return VM_RECLAIM_INVALID_ARGUMENT;
391 		}
392 		return VM_RECLAIM_SUCCESS;
393 	}
394 
395 	mach_vm_reclaim_id_t busy = os_atomic_load_wide(&ring->busy, relaxed);
396 	if (id < busy) {
397 		*state = VM_RECLAIM_BUSY;
398 	} else {
399 		*state = VM_RECLAIM_UNRECLAIMED;
400 	}
401 	return VM_RECLAIM_SUCCESS;
402 }
403 
404 mach_vm_reclaim_error_t
mach_vm_reclaim_update_kernel_accounting(const mach_vm_reclaim_ring_t ring)405 mach_vm_reclaim_update_kernel_accounting(const mach_vm_reclaim_ring_t ring)
406 {
407 	mach_error_t err;
408 	uint64_t bytes_reclaimed = 0;
409 	uint64_t now, last_sample;
410 
411 	os_atomic_rmw_loop(&ring->last_sample_abs, last_sample, now, relaxed, {
412 		now = mach_absolute_time();
413 		if (now - last_sample < ring->sampling_period_abs) {
414 		        os_atomic_rmw_loop_give_up(return VM_RECLAIM_SUCCESS; );
415 		}
416 	});
417 	err = mach_vm_reclaim_update_kernel_accounting_trap(current_task(),
418 	    &bytes_reclaimed);
419 	size_t reclaimable_bytes = os_atomic_sub(&ring->reclaimable_bytes, bytes_reclaimed, relaxed);
420 	os_atomic_min(&ring->reclaimable_bytes_min, reclaimable_bytes, relaxed);
421 	return err;
422 }
423 
424 bool
mach_vm_reclaim_is_reusable(mach_vm_reclaim_state_t state)425 mach_vm_reclaim_is_reusable(
426 	mach_vm_reclaim_state_t state)
427 {
428 	return state == VM_RECLAIM_FREED || state == VM_RECLAIM_UNRECLAIMED;
429 }
430 
431 mach_vm_reclaim_error_t
mach_vm_reclaim_ring_capacity(mach_vm_reclaim_ring_t ring,mach_vm_reclaim_count_t * capacity)432 mach_vm_reclaim_ring_capacity(mach_vm_reclaim_ring_t ring, mach_vm_reclaim_count_t *capacity)
433 {
434 	if (ring == NULL) {
435 		return VM_RECLAIM_INVALID_RING;
436 	}
437 	if (capacity == NULL) {
438 		return VM_RECLAIM_INVALID_ARGUMENT;
439 	}
440 	*capacity = ring->len;
441 	return VM_RECLAIM_SUCCESS;
442 }
443 
444 mach_vm_reclaim_error_t
mach_vm_reclaim_ring_flush(mach_vm_reclaim_ring_t ring,mach_vm_reclaim_count_t num_entries_to_reclaim)445 mach_vm_reclaim_ring_flush(
446 	mach_vm_reclaim_ring_t ring,
447 	mach_vm_reclaim_count_t num_entries_to_reclaim)
448 {
449 	mach_vm_size_t bytes_reclaimed;
450 	mach_error_t err;
451 	if (ring == NULL) {
452 		return VM_RECLAIM_INVALID_RING;
453 	}
454 	if (num_entries_to_reclaim == 0) {
455 		return VM_RECLAIM_INVALID_ARGUMENT;
456 	}
457 
458 	err = mach_vm_deferred_reclamation_buffer_flush(mach_task_self(),
459 	    num_entries_to_reclaim, &bytes_reclaimed);
460 	if (err == ERR_SUCCESS) {
461 		size_t reclaimable_bytes = os_atomic_sub(&ring->reclaimable_bytes, bytes_reclaimed, relaxed);
462 		os_atomic_min(&ring->reclaimable_bytes_min, reclaimable_bytes, release);
463 	}
464 	return err;
465 }
466 
467 mach_vm_reclaim_error_t
mach_vm_reclaim_get_rings_for_task(task_read_t task,mach_vm_reclaim_ring_ref_t refs_out,mach_vm_reclaim_count_t * count_inout)468 mach_vm_reclaim_get_rings_for_task(
469 	task_read_t task,
470 	mach_vm_reclaim_ring_ref_t refs_out,
471 	mach_vm_reclaim_count_t *count_inout)
472 {
473 	/*
474 	 * Technically, we could support multiple rings per task. But for now, we
475 	 * only have one - so this is kind of a weird-looking shim that fakes that
476 	 * behavior at the libsyscall layer to make things easier in case anything
477 	 * changes.
478 	 */
479 
480 	kern_return_t kr;
481 	mach_vm_address_t addr;
482 	mach_vm_size_t size;
483 
484 	if (count_inout == NULL) {
485 		return VM_RECLAIM_INVALID_ARGUMENT;
486 	}
487 
488 	kr = mach_vm_deferred_reclamation_buffer_query(task, &addr, &size);
489 
490 	if (kr != KERN_SUCCESS) {
491 		switch (kr) {
492 		case KERN_NOT_SUPPORTED:
493 			return VM_RECLAIM_NOT_SUPPORTED;
494 		case KERN_INVALID_ARGUMENT:
495 		case KERN_INVALID_TASK:
496 		case KERN_INVALID_ADDRESS:
497 			return VM_RECLAIM_INVALID_ARGUMENT;
498 		default:
499 			return kr;
500 		}
501 	}
502 
503 	/* Size query. If addr == NULL, it doesn't have a ring */
504 	if (refs_out == NULL) {
505 		*count_inout = addr ? 1 : 0;
506 		return KERN_SUCCESS;
507 	}
508 
509 	if (addr) {
510 		if (*count_inout >= 1) {
511 			refs_out->addr = addr;
512 			refs_out->size = size;
513 		}
514 		*count_inout = 1;
515 	} else {
516 		*count_inout = 0;
517 	}
518 
519 	return KERN_SUCCESS;
520 }
521 
522 static mach_vm_reclaim_error_t
verify_ring_allocation_size(mach_vm_address_t addr,mach_vm_size_t size)523 verify_ring_allocation_size(mach_vm_address_t addr, mach_vm_size_t size)
524 {
525 	if (size < offsetof(struct mach_vm_reclaim_ring_s, entries)) {
526 		return VM_RECLAIM_INVALID_RING;
527 	}
528 
529 	mach_vm_reclaim_ring_t ring = (mach_vm_reclaim_ring_t) addr;
530 	mach_vm_size_t supposed_size =
531 	    offsetof(struct mach_vm_reclaim_ring_s, entries) +
532 	    (ring->max_len * sizeof(struct mach_vm_reclaim_entry_s));
533 
534 	/* store allocation size in ring->_unused so that we can free it later */
535 	ring->_unused = size;
536 
537 	return (supposed_size <= size) ? VM_RECLAIM_SUCCESS : VM_RECLAIM_INVALID_RING;
538 }
539 
540 mach_vm_reclaim_error_t
mach_vm_reclaim_ring_copy(task_read_t task,mach_vm_reclaim_ring_ref_t ref,mach_vm_reclaim_ring_copy_t * ring_out)541 mach_vm_reclaim_ring_copy(
542 	task_read_t task,
543 	mach_vm_reclaim_ring_ref_t ref,
544 	mach_vm_reclaim_ring_copy_t *ring_out)
545 {
546 	mach_vm_address_t address;
547 	vm_prot_t curprot = VM_PROT_DEFAULT;
548 	vm_prot_t maxprot = VM_PROT_DEFAULT;
549 	kern_return_t kr = mach_vm_remap(
550 		mach_task_self(),
551 		&address,
552 		ref->size,
553 		0,
554 		VM_FLAGS_ANYWHERE,
555 		task,
556 		ref->addr,
557 		TRUE,
558 		&curprot,
559 		&maxprot,
560 		VM_INHERIT_DEFAULT);
561 
562 	switch (kr) {
563 	case KERN_INVALID_TASK:
564 	case KERN_INVALID_ADDRESS:
565 	case KERN_INVALID_ARGUMENT:
566 		return VM_RECLAIM_INVALID_ARGUMENT;
567 	case KERN_SUCCESS:
568 		break;
569 	default:
570 		return kr;
571 	}
572 
573 	kr = verify_ring_allocation_size(address, ref->size);
574 	if (kr != VM_RECLAIM_SUCCESS) {
575 		return kr;
576 	}
577 
578 	*ring_out = address;
579 	return VM_RECLAIM_SUCCESS;
580 }
581 
582 mach_vm_reclaim_error_t
mach_vm_reclaim_copied_ring_free(mach_vm_reclaim_ring_copy_t * cring)583 mach_vm_reclaim_copied_ring_free(
584 	mach_vm_reclaim_ring_copy_t *cring)
585 {
586 	kern_return_t kr;
587 	mach_vm_reclaim_ring_t ring = (mach_vm_reclaim_ring_t) *cring;
588 
589 	kr = mach_vm_deallocate(
590 		mach_task_self(),
591 		(mach_vm_address_t) *cring,
592 		ring->_unused);
593 
594 	if (kr == KERN_SUCCESS) {
595 		*cring = NULL;
596 	}
597 
598 	return kr;
599 }
600 
601 mach_vm_reclaim_error_t
mach_vm_reclaim_copied_ring_query(mach_vm_reclaim_ring_copy_t * ring_copy,mach_vm_reclaim_region_t regions_out,mach_vm_reclaim_count_t * count_inout)602 mach_vm_reclaim_copied_ring_query(
603 	mach_vm_reclaim_ring_copy_t *ring_copy,
604 	mach_vm_reclaim_region_t regions_out,
605 	mach_vm_reclaim_count_t *count_inout)
606 {
607 	mach_vm_reclaim_id_t head, tail, idx, entry_idx;
608 	mach_vm_reclaim_entry_t entry;
609 	mach_vm_reclaim_count_t count;
610 	mach_vm_reclaim_ring_t ring = (mach_vm_reclaim_ring_t) *ring_copy;
611 
612 	if (ring == NULL) {
613 		return VM_RECLAIM_INVALID_RING;
614 	}
615 
616 	if (count_inout == NULL) {
617 		return VM_RECLAIM_INVALID_ARGUMENT;
618 	}
619 
620 	head = os_atomic_load_wide(&ring->head, relaxed);
621 	tail = os_atomic_load_wide(&ring->tail, relaxed);
622 
623 	if (tail < head) {
624 		*count_inout = 0;
625 		return VM_RECLAIM_SUCCESS;
626 	}
627 
628 	count = (mach_vm_reclaim_count_t) (tail - head);
629 
630 	/* Query size */
631 	if (regions_out == NULL) {
632 		*count_inout = count;
633 		return VM_RECLAIM_SUCCESS;
634 	}
635 
636 	count = (count < *count_inout) ? count : *count_inout;
637 
638 	for (idx = 0; idx < count; idx++) {
639 		entry_idx = (head + idx) % ring->len;
640 		if (entry_idx > ring->max_len) {
641 			/*
642 			 * Make sure we don't accidentally read outside of the mapped region
643 			 * due to a malformed ring
644 			 */
645 			*count_inout = (mach_vm_reclaim_count_t) idx;
646 			return VM_RECLAIM_INVALID_CAPACITY;
647 		}
648 		entry = &ring->entries[entry_idx];
649 		regions_out->vmrr_addr = entry->address;
650 		regions_out->vmrr_size = entry->size;
651 		regions_out->vmrr_behavior = entry->behavior;
652 		regions_out++;
653 	}
654 
655 	*count_inout = count;
656 
657 	return VM_RECLAIM_SUCCESS;
658 }
659 
660 #endif /* defined(__LP64__) */
661