1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <kern/exc_guard.h>
30 #include <kern/locks.h>
31 #include <kern/task.h>
32 #include <kern/zalloc.h>
33 #include <kern/misc_protos.h>
34 #include <kern/startup.h>
35 #include <kern/sched.h>
36 #include <libkern/OSAtomic.h>
37 #include <mach/kern_return.h>
38 #include <mach/mach_types.h>
39 #include <mach/mach_vm.h>
40 #include <mach/vm_reclaim.h>
41 #include <os/log.h>
42 #include <pexpert/pexpert.h>
43 #include <vm/vm_map.h>
44 #include <vm/vm_map_internal.h>
45 #include <vm/vm_reclaim_internal.h>
46 #include <sys/queue.h>
47 #include <os/atomic_private.h>
48
49 #pragma mark Tunables
50 TUNABLE(uint32_t, kReclaimChunkSize, "vm_reclaim_chunk_size", 16);
51 static integer_t kReclaimThreadPriority = BASEPRI_VM;
52 // Reclaim down to vm_reclaim_max_threshold / vm_reclaim_trim_divisor when doing a trim reclaim operation
53 TUNABLE_DEV_WRITEABLE(uint64_t, vm_reclaim_trim_divisor, "vm_reclaim_trim_divisor", 2);
54 TUNABLE_DT_DEV_WRITEABLE(uint64_t, vm_reclaim_max_threshold, "/defaults", "kern.vm_reclaim_max_threshold", "vm_reclaim_max_threshold", 0, TUNABLE_DT_NONE);
55 // Used to debug vm_reclaim kills
56 TUNABLE(bool, panic_on_kill, "vm_reclaim_panic_on_kill", false);
57
58 #pragma mark Declarations
59 typedef struct proc *proc_t;
60 extern char *proc_best_name(proc_t proc);
61 extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code, mach_exception_data_type_t subcode);
62 struct proc *proc_ref(struct proc *p, int locked);
63 int proc_rele(proc_t p);
64 static bool reclaim_copyin_head(vm_deferred_reclamation_metadata_t metadata, uint64_t *head);
65 static bool reclaim_copyin_tail(vm_deferred_reclamation_metadata_t metadata, uint64_t *tail);
66 static bool reclaim_copyin_busy(vm_deferred_reclamation_metadata_t metadata, uint64_t *busy);
67
68 struct vm_deferred_reclamation_metadata_s {
69 TAILQ_ENTRY(vm_deferred_reclamation_metadata_s) vdrm_list; // Global list containing every reclamation buffer
70 TAILQ_ENTRY(vm_deferred_reclamation_metadata_s) vdrm_async_list; // A list containing buffers that are ripe for reclamation
71 decl_lck_mtx_data(, vdrm_lock); /* Held when reclaiming from the buffer */
72 /*
73 * The task owns this structure but we maintain a backpointer here
74 * so that we can send an exception if we hit an error.
75 * Since this is a backpointer we don't hold a reference (it's a weak pointer).
76 */
77 task_t vdrm_task;
78 vm_map_t vdrm_map;
79 user_addr_t vdrm_reclaim_buffer;
80 mach_vm_size_t vdrm_buffer_size;
81 user_addr_t vdrm_reclaim_indices;
82 uint64_t vdrm_reclaimed_at;
83 /*
84 * These two values represent running sums of bytes placed in the buffer and bytes reclaimed out of the buffer
85 * cumulatively. Both values are in terms of virtual memory, so they give an upper bound
86 * on the amount of physical memory that can be reclaimed.
87 * To get an estimate of the current amount of VA in the buffer do vdrm_num_bytes_reclaimed - vdrm_num_bytes_put_in_buffer.
88 * Note that neither value is protected by the vdrm_lock.
89 */
90 _Atomic size_t vdrm_num_bytes_put_in_buffer;
91 _Atomic size_t vdrm_num_bytes_reclaimed;
92 };
93 static void process_async_reclamation_list(void);
94
95 extern void *proc_find(int pid);
96 extern task_t proc_task(proc_t);
97
98 #pragma mark Globals
99 static KALLOC_TYPE_DEFINE(vm_reclaim_metadata_zone, struct vm_deferred_reclamation_metadata_s, KT_DEFAULT);
100 static LCK_GRP_DECLARE(vm_reclaim_lock_grp, "vm_reclaim");
101 static os_log_t vm_reclaim_log_handle;
102 static size_t kReclaimChunkFailed = UINT64_MAX;
103
104 /*
105 * The ringbuffer must contain at least 2 entries to distinguish between empty
106 * (head == tail) and full (head == tail + 1).
107 */
108 #define BUFFER_MIN_ENTRY_COUNT 2
109
110 /*
111 * We maintain two lists of reclamation buffers.
112 * The reclamation_buffers list contains every buffer in the system.
113 * The async_reclamation_buffers_list contains buffers that are ripe for reclamation.
114 * Each list has its own lock.
115 */
116 static TAILQ_HEAD(, vm_deferred_reclamation_metadata_s) reclamation_buffers = TAILQ_HEAD_INITIALIZER(reclamation_buffers);
117
118 static TAILQ_HEAD(, vm_deferred_reclamation_metadata_s) async_reclamation_buffers = TAILQ_HEAD_INITIALIZER(async_reclamation_buffers);
119 /*
120 * The reclamation_buffers_lock protects the reclamation_buffers list.
121 * It must be held when iterating over the list or manipulating the list.
122 * It should be dropped when acting on a specific metadata entry after acquiring the vdrm_lock.
123 */
124 LCK_MTX_DECLARE(reclamation_buffers_lock, &vm_reclaim_lock_grp);
125 LCK_MTX_DECLARE(async_reclamation_buffers_lock, &vm_reclaim_lock_grp);
126 static size_t reclamation_buffers_length;
127 static uint64_t reclamation_counter; // generation count for global reclaims
128
129 static SECURITY_READ_ONLY_LATE(thread_t) vm_reclaim_thread;
130 static void reclaim_thread(void *param __unused, wait_result_t wr __unused);
131
132 #pragma mark Implementation
133
134 /*
135 * The current design is not tolerant to faulting on the buffer under the
136 * metadata lock. Wire the buffer as a stop-gap solution for now; in the
137 * future, the synchronization scheme should be revised to allow the buffer
138 * to be pageable (rdar://112039103).
139 */
140
141 static kern_return_t
vmdr_metadata_wire(vm_deferred_reclamation_metadata_t metadata)142 vmdr_metadata_wire(vm_deferred_reclamation_metadata_t metadata)
143 {
144 kern_return_t kr;
145 vm_map_offset_t buffer_start = (metadata->vdrm_reclaim_buffer -
146 offsetof(struct mach_vm_reclaim_buffer_v1_s, entries));
147 vm_map_offset_t buffer_end = (metadata->vdrm_reclaim_buffer +
148 metadata->vdrm_buffer_size);
149 kr = vm_map_wire_kernel(metadata->vdrm_map, buffer_start, buffer_end,
150 VM_PROT_NONE, VM_KERN_MEMORY_OSFMK, TRUE);
151 if (kr != KERN_SUCCESS) {
152 os_log_error(vm_reclaim_log_handle,
153 "vm_reclaim: failed to wire userspace reclaim buffer for pid %d (%d)",
154 task_pid(metadata->vdrm_task), kr);
155 }
156 return kr;
157 }
158
159 static kern_return_t
vmdr_metadata_unwire(vm_deferred_reclamation_metadata_t metadata)160 vmdr_metadata_unwire(vm_deferred_reclamation_metadata_t metadata)
161 {
162 kern_return_t kr;
163 vm_map_offset_t buffer_start = (metadata->vdrm_reclaim_buffer -
164 offsetof(struct mach_vm_reclaim_buffer_v1_s, entries));
165 vm_map_offset_t buffer_end = (metadata->vdrm_reclaim_buffer +
166 metadata->vdrm_buffer_size);
167 kr = vm_map_unwire(metadata->vdrm_map, buffer_start, buffer_end, TRUE);
168 if (kr != KERN_SUCCESS) {
169 os_log_error(vm_reclaim_log_handle,
170 "vm_reclaim: unable to un-wire buffer %p (%llu) for pid %d (%d)",
171 (void *)buffer_start, (buffer_end - buffer_start),
172 task_pid(metadata->vdrm_task), kr);
173 }
174 return kr;
175 }
176
177 static vm_deferred_reclamation_metadata_t
vmdr_metadata_alloc(task_t task,user_addr_t buffer,mach_vm_size_t size,user_addr_t indices)178 vmdr_metadata_alloc(
179 task_t task,
180 user_addr_t buffer,
181 mach_vm_size_t size,
182 user_addr_t indices)
183 {
184 vm_deferred_reclamation_metadata_t metadata;
185 vm_map_t map = task->map;
186
187 assert(!map->is_nested_map);
188
189 metadata = zalloc_flags(vm_reclaim_metadata_zone, Z_WAITOK | Z_ZERO);
190 lck_mtx_init(&metadata->vdrm_lock, &vm_reclaim_lock_grp, LCK_ATTR_NULL);
191 metadata->vdrm_task = task;
192 metadata->vdrm_map = map;
193 metadata->vdrm_reclaim_buffer = buffer;
194 metadata->vdrm_buffer_size = size;
195 metadata->vdrm_reclaim_indices = indices;
196
197 /*
198 * we do not need to hold a lock on `task` because this is called
199 * either at fork() time or from the context of current_task().
200 */
201 vm_map_reference(map);
202 return metadata;
203 }
204
205 static void
vmdr_metadata_free(vm_deferred_reclamation_metadata_t metadata)206 vmdr_metadata_free(vm_deferred_reclamation_metadata_t metadata)
207 {
208 vm_map_deallocate(metadata->vdrm_map);
209 lck_mtx_destroy(&metadata->vdrm_lock, &vm_reclaim_lock_grp);
210 zfree(vm_reclaim_metadata_zone, metadata);
211 }
212
213 kern_return_t
vm_deferred_reclamation_buffer_init_internal(task_t task,mach_vm_offset_t address,mach_vm_size_t size)214 vm_deferred_reclamation_buffer_init_internal(
215 task_t task,
216 mach_vm_offset_t address,
217 mach_vm_size_t size)
218 {
219 kern_return_t kr = KERN_FAILURE, tmp_kr;
220 vm_deferred_reclamation_metadata_t metadata = NULL;
221 bool success;
222 uint64_t head = 0, tail = 0, busy = 0;
223
224 if (address == 0 ||
225 size < (sizeof(struct mach_vm_reclaim_buffer_v1_s) +
226 BUFFER_MIN_ENTRY_COUNT * sizeof(mach_vm_reclaim_entry_v1_t)) ||
227 !VM_MAP_PAGE_ALIGNED(address, VM_MAP_PAGE_MASK(task->map)) ||
228 !VM_MAP_PAGE_ALIGNED((address + size), VM_MAP_PAGE_MASK(task->map))) {
229 return KERN_INVALID_ARGUMENT;
230 }
231
232 /* vm_reclaim is disabled */
233 if (vm_reclaim_max_threshold == 0) {
234 os_log_error(vm_reclaim_log_handle,
235 "vm_reclaim: failed to initialize vmdr buffer - reclaim is disabled (%llu)",
236 vm_reclaim_max_threshold);
237 return KERN_NOT_SUPPORTED;
238 }
239
240 user_addr_t buffer = address + \
241 offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
242 mach_vm_size_t buffer_size = size - \
243 offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
244 user_addr_t indices = address + \
245 offsetof(struct mach_vm_reclaim_buffer_v1_s, indices);
246
247 metadata = vmdr_metadata_alloc(task, buffer, buffer_size, indices);
248
249 /*
250 * Validate the starting indices
251 */
252 success = reclaim_copyin_busy(metadata, &busy);
253 if (!success) {
254 kr = KERN_INVALID_ARGUMENT;
255 goto out;
256 }
257 success = reclaim_copyin_head(metadata, &head);
258 if (!success) {
259 kr = KERN_INVALID_ARGUMENT;
260 goto out;
261 }
262 success = reclaim_copyin_tail(metadata, &tail);
263 if (!success) {
264 kr = KERN_INVALID_ARGUMENT;
265 goto out;
266 }
267 if (head != 0 || tail != 0 || busy != 0) {
268 kr = KERN_INVALID_ARGUMENT;
269 goto out;
270 }
271
272 kr = vmdr_metadata_wire(metadata);
273 if (kr != KERN_SUCCESS) {
274 goto out;
275 }
276
277 /*
278 * Publish the metadata to the task & global buffer list. This must be
279 * done under the task lock to synchronize with task termination - i.e.
280 * task_terminate_internal is guaranteed to see the published metadata and
281 * tear it down.
282 */
283 lck_mtx_lock(&reclamation_buffers_lock);
284 task_lock(task);
285
286 if (!task_is_active(task)) {
287 os_log_error(vm_reclaim_log_handle,
288 "vm_reclaim: failed to initialize buffer on dying task (pid %d)", task_pid(task));
289 kr = KERN_TERMINATED;
290 goto fail_wired;
291 } else if (task->deferred_reclamation_metadata != NULL) {
292 os_log_error(vm_reclaim_log_handle,
293 "vm_reclaim: tried to overwrite existing reclaim buffer for pid %d", task_pid(task));
294 kr = KERN_INVALID_ARGUMENT;
295 goto fail_wired;
296 }
297
298 TAILQ_INSERT_TAIL(&reclamation_buffers, metadata, vdrm_list);
299 reclamation_buffers_length++;
300
301 task->deferred_reclamation_metadata = metadata;
302
303 task_unlock(task);
304 lck_mtx_unlock(&reclamation_buffers_lock);
305
306 return KERN_SUCCESS;
307
308 fail_wired:
309 task_unlock(task);
310 lck_mtx_unlock(&reclamation_buffers_lock);
311 tmp_kr = vmdr_metadata_unwire(metadata);
312 assert3u(tmp_kr, ==, KERN_SUCCESS);
313
314 out:
315 vmdr_metadata_free(metadata);
316 return kr;
317 }
318
319 void
vm_deferred_reclamation_buffer_uninstall(vm_deferred_reclamation_metadata_t metadata)320 vm_deferred_reclamation_buffer_uninstall(vm_deferred_reclamation_metadata_t metadata)
321 {
322 assert(metadata != NULL);
323 /*
324 * First remove the buffer from the global list so no one else can get access to it.
325 */
326 lck_mtx_lock(&reclamation_buffers_lock);
327 TAILQ_REMOVE(&reclamation_buffers, metadata, vdrm_list);
328 reclamation_buffers_length--;
329 lck_mtx_unlock(&reclamation_buffers_lock);
330
331 /*
332 * Now remove it from the async list (if present)
333 */
334 lck_mtx_lock(&async_reclamation_buffers_lock);
335 if (metadata->vdrm_async_list.tqe_next != NULL || metadata->vdrm_async_list.tqe_prev != NULL) {
336 TAILQ_REMOVE(&async_reclamation_buffers, metadata, vdrm_async_list);
337 metadata->vdrm_async_list.tqe_next = NULL;
338 metadata->vdrm_async_list.tqe_prev = NULL;
339 }
340 lck_mtx_unlock(&async_reclamation_buffers_lock);
341
342 // A kernel thread may have grabbed the lock for this buffer before we had
343 // a chance to remove it from the queues. Take the metadata lock to ensure
344 // any such workers are finished operating on the buffer.
345 lck_mtx_lock(&metadata->vdrm_lock);
346 lck_mtx_unlock(&metadata->vdrm_lock);
347
348 vmdr_metadata_unwire(metadata);
349 }
350
351 void
vm_deferred_reclamation_buffer_deallocate(vm_deferred_reclamation_metadata_t metadata)352 vm_deferred_reclamation_buffer_deallocate(vm_deferred_reclamation_metadata_t metadata)
353 {
354 assert(metadata != NULL);
355 vmdr_metadata_free(metadata);
356 }
357
358 static user_addr_t
get_head_ptr(user_addr_t indices)359 get_head_ptr(user_addr_t indices)
360 {
361 return indices + offsetof(mach_vm_reclaim_indices_v1_t, head);
362 }
363
364 static user_addr_t
get_tail_ptr(user_addr_t indices)365 get_tail_ptr(user_addr_t indices)
366 {
367 return indices + offsetof(mach_vm_reclaim_indices_v1_t, tail);
368 }
369
370 static user_addr_t
get_busy_ptr(user_addr_t indices)371 get_busy_ptr(user_addr_t indices)
372 {
373 return indices + offsetof(mach_vm_reclaim_indices_v1_t, busy);
374 }
375
376 static void
reclaim_kill_with_reason(vm_deferred_reclamation_metadata_t metadata,unsigned reason,mach_exception_data_type_t subcode)377 reclaim_kill_with_reason(
378 vm_deferred_reclamation_metadata_t metadata,
379 unsigned reason,
380 mach_exception_data_type_t subcode)
381 {
382 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
383 mach_exception_code_t code = 0;
384 task_t task = metadata->vdrm_task;
385 proc_t p = NULL;
386 boolean_t fatal = TRUE;
387 bool killing_self = false;
388 pid_t pid;
389 int err;
390
391 if (panic_on_kill) {
392 panic("vm_reclaim: About to kill %p due to %d with subcode %lld\n", task, reason, subcode);
393 }
394
395 EXC_GUARD_ENCODE_TYPE(code, guard_type);
396 EXC_GUARD_ENCODE_FLAVOR(code, reason);
397 EXC_GUARD_ENCODE_TARGET(code, 0);
398
399 assert(metadata->vdrm_task != kernel_task);
400 killing_self = task == current_task();
401 if (!killing_self) {
402 /*
403 * Grab a reference on the task to make sure it doesn't go away
404 * after we drop the metadata lock
405 */
406 task_reference(task);
407 }
408 /*
409 * We need to issue a wakeup in case this kill is coming from the async path.
410 * Once we drop the lock the caller can no longer do this wakeup, but
411 * if there's someone blocked on this reclaim they hold a map reference
412 * and thus need to be woken up so the map can be freed.
413 */
414 thread_wakeup(&metadata->vdrm_async_list);
415 lck_mtx_unlock(&metadata->vdrm_lock);
416
417 if (reason == kGUARD_EXC_DEALLOC_GAP) {
418 task_lock(task);
419 fatal = (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL);
420 task_unlock(task);
421 }
422
423 if (!fatal) {
424 os_log_info(vm_reclaim_log_handle,
425 "vm_reclaim: Skipping non fatal guard exception.\n");
426 goto out;
427 }
428
429 pid = task_pid(task);
430 if (killing_self) {
431 p = get_bsdtask_info(task);
432 } else {
433 p = proc_find(pid);
434 if (p && proc_task(p) != task) {
435 os_log_error(vm_reclaim_log_handle,
436 "vm_reclaim: Unable to deliver guard exception because proc is gone & pid rolled over.\n");
437 goto out;
438 }
439
440 task_deallocate(task);
441 task = NULL;
442 }
443
444 if (!p) {
445 os_log_error(vm_reclaim_log_handle,
446 "vm_reclaim: Unable to deliver guard exception because task does not have a proc.\n");
447 goto out;
448 }
449
450 err = exit_with_guard_exception(p, code, subcode);
451 if (err != 0) {
452 os_log_error(vm_reclaim_log_handle, "vm_reclaim: Unable to deliver guard exception to %p: %d\n", p, err);
453 }
454 out:
455 if (!killing_self) {
456 if (p) {
457 proc_rele(p);
458 p = NULL;
459 }
460 if (task) {
461 task_deallocate(task);
462 task = NULL;
463 }
464 }
465 }
466
467 static void
reclaim_handle_copyio_error(vm_deferred_reclamation_metadata_t metadata,int result)468 reclaim_handle_copyio_error(vm_deferred_reclamation_metadata_t metadata, int result)
469 {
470 reclaim_kill_with_reason(metadata, kGUARD_EXC_RECLAIM_COPYIO_FAILURE, result);
471 }
472
473 /*
474 * Helper functions to do copyio on the head, tail, and busy pointers.
475 * Note that the kernel will only write to the busy and head pointers.
476 * Userspace is not supposed to write to the head or busy pointers, but the kernel
477 * must be resilient to that kind of bug in userspace.
478 */
479
480
481 static bool
reclaim_copyin_head(vm_deferred_reclamation_metadata_t metadata,uint64_t * head)482 reclaim_copyin_head(vm_deferred_reclamation_metadata_t metadata, uint64_t *head)
483 {
484 int result;
485 user_addr_t indices = metadata->vdrm_reclaim_indices;
486 user_addr_t head_ptr = get_head_ptr(indices);
487
488 result = copyin_atomic64(head_ptr, head);
489
490 if (result != 0) {
491 os_log_error(vm_reclaim_log_handle,
492 "vm_reclaim: Unable to copy head ptr from 0x%llx: err=%d\n", head_ptr, result);
493 reclaim_handle_copyio_error(metadata, result);
494 return false;
495 }
496 return true;
497 }
498
499 static bool
reclaim_copyin_tail(vm_deferred_reclamation_metadata_t metadata,uint64_t * tail)500 reclaim_copyin_tail(vm_deferred_reclamation_metadata_t metadata, uint64_t *tail)
501 {
502 int result;
503 user_addr_t indices = metadata->vdrm_reclaim_indices;
504 user_addr_t tail_ptr = get_tail_ptr(indices);
505
506 result = copyin_atomic64(tail_ptr, tail);
507
508 if (result != 0) {
509 os_log_error(vm_reclaim_log_handle,
510 "vm_reclaim: Unable to copy tail ptr from 0x%llx: err=%d\n", tail_ptr, result);
511 reclaim_handle_copyio_error(metadata, result);
512 return false;
513 }
514 return true;
515 }
516
517 static bool
reclaim_copyin_busy(vm_deferred_reclamation_metadata_t metadata,uint64_t * busy)518 reclaim_copyin_busy(vm_deferred_reclamation_metadata_t metadata, uint64_t *busy)
519 {
520 int result;
521 user_addr_t indices = metadata->vdrm_reclaim_indices;
522 user_addr_t busy_ptr = get_busy_ptr(indices);
523
524 result = copyin_atomic64(busy_ptr, busy);
525
526 if (result != 0) {
527 os_log_error(vm_reclaim_log_handle,
528 "vm_reclaim: Unable to copy busy ptr from 0x%llx: err=%d\n", busy_ptr, result);
529 reclaim_handle_copyio_error(metadata, result);
530 return false;
531 }
532 return true;
533 }
534
535 static bool
reclaim_copyout_busy(vm_deferred_reclamation_metadata_t metadata,uint64_t value)536 reclaim_copyout_busy(vm_deferred_reclamation_metadata_t metadata, uint64_t value)
537 {
538 int result;
539 user_addr_t indices = metadata->vdrm_reclaim_indices;
540 user_addr_t busy_ptr = get_busy_ptr(indices);
541
542 result = copyout_atomic64(value, busy_ptr);
543
544 if (result != 0) {
545 os_log_error(vm_reclaim_log_handle,
546 "vm_reclaim: Unable to copy %llu to busy ptr at 0x%llx: err=%d\n", value, busy_ptr, result);
547 reclaim_handle_copyio_error(metadata, result);
548 return false;
549 }
550 return true;
551 }
552
553 static bool
reclaim_copyout_head(vm_deferred_reclamation_metadata_t metadata,uint64_t value)554 reclaim_copyout_head(vm_deferred_reclamation_metadata_t metadata, uint64_t value)
555 {
556 int result;
557 user_addr_t indices = metadata->vdrm_reclaim_indices;
558 user_addr_t head_ptr = get_head_ptr(indices);
559
560 result = copyout_atomic64(value, head_ptr);
561
562 if (result != 0) {
563 os_log_error(vm_reclaim_log_handle,
564 "vm_reclaim: Unable to copy %llu to head ptr at 0x%llx: err=%d\n", value, head_ptr, result);
565 reclaim_handle_copyio_error(metadata, result);
566 return false;
567 }
568 return true;
569 }
570
571 /*
572 * Reclaim a chunk from the buffer.
573 * Returns the number of entries reclaimed or 0 if there are no entries left in the buffer.
574 */
575 static size_t
reclaim_chunk(vm_deferred_reclamation_metadata_t metadata)576 reclaim_chunk(vm_deferred_reclamation_metadata_t metadata)
577 {
578 assert(metadata != NULL);
579 LCK_MTX_ASSERT(&metadata->vdrm_lock, LCK_MTX_ASSERT_OWNED);
580
581 int result = 0;
582 size_t num_reclaimed = 0;
583 uint64_t head = 0, tail = 0, busy = 0, num_to_reclaim = 0, new_tail = 0, num_copied = 0, buffer_len = 0;
584 user_addr_t indices;
585 vm_map_t map = metadata->vdrm_map, old_map;
586 mach_vm_reclaim_entry_v1_t reclaim_entries[kReclaimChunkSize];
587 bool success;
588
589 buffer_len = metadata->vdrm_buffer_size / sizeof(mach_vm_reclaim_entry_v1_t);
590
591 memset(reclaim_entries, 0, sizeof(reclaim_entries));
592
593 indices = (user_addr_t) metadata->vdrm_reclaim_indices;
594 old_map = vm_map_switch(map);
595
596 success = reclaim_copyin_busy(metadata, &busy);
597 if (!success) {
598 goto fail;
599 }
600 success = reclaim_copyin_head(metadata, &head);
601 if (!success) {
602 goto fail;
603 }
604 success = reclaim_copyin_tail(metadata, &tail);
605 if (!success) {
606 goto fail;
607 }
608
609 if (busy != head) {
610 // Userspace overwrote one of the pointers
611 os_log_error(vm_reclaim_log_handle,
612 "vm_reclaim: Userspace modified head or busy pointer! head: %llu (0x%llx) != busy: %llu (0x%llx) | tail = %llu (0x%llx)\n",
613 head, get_head_ptr(indices), busy, get_busy_ptr(indices), tail, get_tail_ptr(indices));
614 reclaim_kill_with_reason(metadata, kGUARD_EXC_RECLAIM_INDEX_FAILURE, busy);
615 goto fail;
616 }
617
618 if (tail < head) {
619 // Userspace is likely in the middle of trying to re-use an entry, bail on this reclamation
620 os_log_error(vm_reclaim_log_handle,
621 "vm_reclaim: Userspace modified head or tail pointer! head: %llu (0x%llx) > tail: %llu (0x%llx) | busy = %llu (0x%llx)\n",
622 head, get_head_ptr(indices), tail, get_tail_ptr(indices), busy, get_busy_ptr(indices));
623 lck_mtx_unlock(&metadata->vdrm_lock);
624 goto fail;
625 }
626
627 num_to_reclaim = tail - head;
628 while (true) {
629 num_to_reclaim = MIN(num_to_reclaim, kReclaimChunkSize);
630 if (num_to_reclaim == 0) {
631 break;
632 }
633 busy = head + num_to_reclaim;
634 success = reclaim_copyout_busy(metadata, busy);
635 if (!success) {
636 goto fail;
637 }
638 os_atomic_thread_fence(seq_cst);
639 success = reclaim_copyin_tail(metadata, &new_tail);
640 if (!success) {
641 goto fail;
642 }
643
644 if (new_tail >= busy) {
645 /* Got num_to_reclaim entries */
646 break;
647 }
648 tail = new_tail;
649 if (tail < head) {
650 // Userspace is likely in the middle of trying to re-use an entry, bail on this reclamation
651 os_log_error(vm_reclaim_log_handle,
652 "vm_reclaim: Userspace modified head or tail pointer! head: %llu (0x%llx) > tail: %llu (0x%llx) | busy = %llu (0x%llx)\n",
653 head, get_head_ptr(indices), tail, get_tail_ptr(indices), busy, get_busy_ptr(indices));
654 lck_mtx_unlock(&metadata->vdrm_lock);
655 goto fail;
656 }
657 /* Can't reclaim these entries. Try again */
658 num_to_reclaim = tail - head;
659 if (num_to_reclaim == 0) {
660 /* Nothing left to reclaim. Reset busy to head. */
661 success = reclaim_copyout_busy(metadata, head);
662 if (!success) {
663 goto fail;
664 }
665 break;
666 }
667 /*
668 * Note that num_to_reclaim must have gotten smaller since tail got smaller,
669 * so this is gauranteed to converge.
670 */
671 }
672
673 while (num_copied < num_to_reclaim) {
674 uint64_t memcpy_start_idx = (head % buffer_len);
675 uint64_t memcpy_end_idx = memcpy_start_idx + num_to_reclaim - num_copied;
676 // Clamp the end idx to the buffer. We'll handle wrap-around in our next go around the loop.
677 memcpy_end_idx = MIN(memcpy_end_idx, buffer_len);
678 uint64_t num_to_copy = memcpy_end_idx - memcpy_start_idx;
679
680 assert(num_to_copy + num_copied <= kReclaimChunkSize);
681 user_addr_t src_ptr = metadata->vdrm_reclaim_buffer + memcpy_start_idx * sizeof(mach_vm_reclaim_entry_v1_t);
682 mach_vm_reclaim_entry_v1_t *dst_ptr = reclaim_entries + num_copied;
683
684 result = copyin(src_ptr, dst_ptr, num_to_copy * sizeof(mach_vm_reclaim_entry_v1_t));
685
686 if (result != 0) {
687 os_log_error(vm_reclaim_log_handle,
688 "vm_reclaim: Unable to copyin %llu entries in reclaim buffer at 0x%llx to 0x%llx: err=%d\n",
689 num_to_copy, src_ptr, (uint64_t) dst_ptr, result);
690 reclaim_handle_copyio_error(metadata, result);
691 goto fail;
692 }
693
694 num_copied += num_to_copy;
695 head += num_to_copy;
696 }
697
698 for (size_t i = 0; i < num_to_reclaim; i++) {
699 mach_vm_reclaim_entry_v1_t *entry = &reclaim_entries[i];
700 if (entry->address != 0 && entry->size != 0) {
701 kern_return_t kr = vm_map_remove_guard(map,
702 vm_map_trunc_page(entry->address,
703 VM_MAP_PAGE_MASK(map)),
704 vm_map_round_page(entry->address + entry->size,
705 VM_MAP_PAGE_MASK(map)),
706 VM_MAP_REMOVE_GAPS_FAIL,
707 KMEM_GUARD_NONE).kmr_return;
708 if (kr == KERN_INVALID_VALUE) {
709 reclaim_kill_with_reason(metadata, kGUARD_EXC_DEALLOC_GAP, entry->address);
710 goto fail;
711 } else if (kr != KERN_SUCCESS) {
712 os_log_error(vm_reclaim_log_handle,
713 "vm_reclaim: Unable to deallocate 0x%llx (%u) from 0x%llx. Err: %d\n",
714 entry->address, entry->size, (uint64_t) map, kr);
715 reclaim_kill_with_reason(metadata, kGUARD_EXC_RECLAIM_DEALLOCATE_FAILURE, kr);
716 goto fail;
717 }
718 num_reclaimed++;
719 os_atomic_add(&metadata->vdrm_num_bytes_reclaimed, entry->size, relaxed);
720 }
721 }
722
723 success = reclaim_copyout_head(metadata, head);
724 if (!success) {
725 goto fail;
726 }
727
728 vm_map_switch(old_map);
729 return num_reclaimed;
730 fail:
731 vm_map_switch(old_map);
732 return kReclaimChunkFailed;
733 }
734
735 /*
736 * Attempts to reclaim until the buffer's estimated number of available bytes is <= num_bytes_reclaimable_threshold
737 * The metadata buffer lock should be held by the caller.
738 *
739 * Returns the number of entries reclaimed.
740 */
741 static size_t
reclaim_entries_from_buffer(vm_deferred_reclamation_metadata_t metadata,size_t num_bytes_reclaimable_threshold)742 reclaim_entries_from_buffer(vm_deferred_reclamation_metadata_t metadata, size_t num_bytes_reclaimable_threshold)
743 {
744 assert(metadata != NULL);
745 LCK_MTX_ASSERT(&metadata->vdrm_lock, LCK_MTX_ASSERT_OWNED);
746 if (!task_is_active(metadata->vdrm_task)) {
747 /*
748 * If the task is exiting, the reclaim below will likely fail and fall through
749 * to the (slower) error path.
750 * So as an optimization, we bail out early here.
751 */
752 return 0;
753 }
754
755 size_t num_entries_reclaimed = 0, num_bytes_reclaimed, estimated_reclaimable_bytes, reclaimable_bytes;
756 while (true) {
757 size_t curr_entries_reclaimed = 0;
758 num_bytes_reclaimed = os_atomic_load(&metadata->vdrm_num_bytes_reclaimed, relaxed);
759 reclaimable_bytes = os_atomic_load(&metadata->vdrm_num_bytes_put_in_buffer, relaxed);
760 if (num_bytes_reclaimed > reclaimable_bytes) {
761 estimated_reclaimable_bytes = 0;
762 } else {
763 estimated_reclaimable_bytes = reclaimable_bytes - num_bytes_reclaimed;
764 }
765 if (reclaimable_bytes <= num_bytes_reclaimable_threshold) {
766 break;
767 }
768 curr_entries_reclaimed = reclaim_chunk(metadata);
769 if (curr_entries_reclaimed == kReclaimChunkFailed) {
770 return kReclaimChunkFailed;
771 }
772 if (curr_entries_reclaimed == 0) {
773 break;
774 }
775 num_entries_reclaimed += curr_entries_reclaimed;
776 }
777
778 return num_entries_reclaimed;
779 }
780
781 /*
782 * Get the reclamation metadata buffer for the given map.
783 * If the buffer exists it is returned locked.
784 */
785 static vm_deferred_reclamation_metadata_t
get_task_reclaim_metadata(task_t task)786 get_task_reclaim_metadata(task_t task)
787 {
788 assert(task != NULL);
789 vm_deferred_reclamation_metadata_t metadata = NULL;
790 task_lock(task);
791 metadata = task->deferred_reclamation_metadata;
792 if (metadata != NULL) {
793 lck_mtx_lock(&metadata->vdrm_lock);
794 }
795 task_unlock(task);
796 return metadata;
797 }
798
799 kern_return_t
vm_deferred_reclamation_buffer_synchronize_internal(task_t task,size_t num_entries_to_reclaim)800 vm_deferred_reclamation_buffer_synchronize_internal(task_t task, size_t num_entries_to_reclaim)
801 {
802 vm_deferred_reclamation_metadata_t metadata = NULL;
803 size_t total_reclaimed = 0;
804
805 if (!task_is_active(task)) {
806 return KERN_FAILURE;
807 }
808
809 metadata = get_task_reclaim_metadata(task);
810 if (metadata == NULL) {
811 return KERN_INVALID_ARGUMENT;
812 }
813
814 while (total_reclaimed < num_entries_to_reclaim) {
815 size_t num_reclaimed = reclaim_chunk(metadata);
816 if (num_reclaimed == kReclaimChunkFailed) {
817 /* Lock has already been released and task is being killed. */
818 return KERN_FAILURE;
819 }
820 if (num_reclaimed == 0) {
821 /* There was nothing to reclaim. A reclamation thread must have beaten us to it. Nothing to do here. */
822 break;
823 }
824
825 total_reclaimed += num_reclaimed;
826 }
827 lck_mtx_unlock(&metadata->vdrm_lock);
828
829 return KERN_SUCCESS;
830 }
831
832 kern_return_t
vm_deferred_reclamation_buffer_update_reclaimable_bytes_internal(task_t task,size_t reclaimable_bytes)833 vm_deferred_reclamation_buffer_update_reclaimable_bytes_internal(task_t task, size_t reclaimable_bytes)
834 {
835 vm_deferred_reclamation_metadata_t metadata = task->deferred_reclamation_metadata;
836 size_t num_bytes_reclaimed, estimated_reclaimable_bytes, num_bytes_in_buffer;
837 bool success;
838 if (metadata == NULL) {
839 return KERN_INVALID_ARGUMENT;
840 }
841
842 /*
843 * The client is allowed to make this call in parallel from multiple threads.
844 * Ensure we only ever increase the value of vdrm_num_bytes_put_in_buffer.
845 * If the client's value is smaller than what we've stored, another thread
846 * raced ahead of them and we've already acted on that accounting so this
847 * call should be a no-op.
848 */
849 success = os_atomic_rmw_loop(&metadata->vdrm_num_bytes_put_in_buffer, num_bytes_in_buffer,
850 reclaimable_bytes, acquire,
851 {
852 if (num_bytes_in_buffer > reclaimable_bytes) {
853 os_atomic_rmw_loop_give_up(break);
854 }
855 });
856 if (!success) {
857 /* Stale value. Nothing new to reclaim */
858 return KERN_SUCCESS;
859 }
860 num_bytes_reclaimed = os_atomic_load(&metadata->vdrm_num_bytes_reclaimed, relaxed);
861
862 if (reclaimable_bytes > num_bytes_reclaimed) {
863 estimated_reclaimable_bytes = reclaimable_bytes - num_bytes_reclaimed;
864 if (estimated_reclaimable_bytes > vm_reclaim_max_threshold) {
865 lck_mtx_lock(&metadata->vdrm_lock);
866 size_t num_reclaimed = reclaim_entries_from_buffer(metadata, vm_reclaim_max_threshold);
867 if (num_reclaimed == kReclaimChunkFailed) {
868 /* Lock has already been released & task is in the process of getting killed. */
869 return KERN_INVALID_ARGUMENT;
870 }
871 lck_mtx_unlock(&metadata->vdrm_lock);
872 }
873 }
874
875 return KERN_SUCCESS;
876 }
877
878 static inline size_t
pick_reclaim_threshold(vm_deferred_reclamation_action_t action)879 pick_reclaim_threshold(vm_deferred_reclamation_action_t action)
880 {
881 switch (action) {
882 case RECLAIM_FULL:
883 return 0;
884 case RECLAIM_TRIM:
885 return vm_reclaim_max_threshold / vm_reclaim_trim_divisor;
886 case RECLAIM_ASYNC:
887 return 0;
888 }
889 }
890
891 void
vm_deferred_reclamation_reclaim_memory(vm_deferred_reclamation_action_t action)892 vm_deferred_reclamation_reclaim_memory(vm_deferred_reclamation_action_t action)
893 {
894 if (action == RECLAIM_ASYNC) {
895 lck_mtx_lock(&async_reclamation_buffers_lock);
896
897 process_async_reclamation_list();
898 lck_mtx_unlock(&async_reclamation_buffers_lock);
899 } else {
900 size_t reclaim_threshold = pick_reclaim_threshold(action);
901 lck_mtx_lock(&reclamation_buffers_lock);
902 reclamation_counter++;
903 while (true) {
904 vm_deferred_reclamation_metadata_t metadata = TAILQ_FIRST(&reclamation_buffers);
905 if (metadata == NULL) {
906 break;
907 }
908 lck_mtx_lock(&metadata->vdrm_lock);
909 if (metadata->vdrm_reclaimed_at >= reclamation_counter) {
910 // We've already seen this one. We're done
911 lck_mtx_unlock(&metadata->vdrm_lock);
912 break;
913 }
914 metadata->vdrm_reclaimed_at = reclamation_counter;
915
916 TAILQ_REMOVE(&reclamation_buffers, metadata, vdrm_list);
917 TAILQ_INSERT_TAIL(&reclamation_buffers, metadata, vdrm_list);
918 lck_mtx_unlock(&reclamation_buffers_lock);
919
920 size_t num_reclaimed = reclaim_entries_from_buffer(metadata, reclaim_threshold);
921 if (num_reclaimed != kReclaimChunkFailed) {
922 lck_mtx_unlock(&metadata->vdrm_lock);
923 }
924
925 lck_mtx_lock(&reclamation_buffers_lock);
926 }
927 lck_mtx_unlock(&reclamation_buffers_lock);
928 }
929 }
930
931 void
vm_deferred_reclamation_reclaim_all_memory(void)932 vm_deferred_reclamation_reclaim_all_memory(void)
933 {
934 vm_deferred_reclamation_reclaim_memory(RECLAIM_FULL);
935 }
936
937 bool
vm_deferred_reclamation_reclaim_from_task_async(task_t task)938 vm_deferred_reclamation_reclaim_from_task_async(task_t task)
939 {
940 bool queued = false;
941 vm_deferred_reclamation_metadata_t metadata = task->deferred_reclamation_metadata;
942
943 if (metadata != NULL) {
944 lck_mtx_lock(&async_reclamation_buffers_lock);
945 if (metadata->vdrm_async_list.tqe_next != NULL ||
946 metadata->vdrm_async_list.tqe_prev != NULL) {
947 // move this buffer to the tail if still on the async list
948 TAILQ_REMOVE(&async_reclamation_buffers, metadata, vdrm_async_list);
949 }
950 TAILQ_INSERT_TAIL(&async_reclamation_buffers, metadata, vdrm_async_list);
951 lck_mtx_unlock(&async_reclamation_buffers_lock);
952 queued = true;
953 thread_wakeup(&vm_reclaim_thread);
954 }
955
956 return queued;
957 }
958
959 bool
vm_deferred_reclamation_reclaim_from_task_sync(task_t task,size_t max_entries_to_reclaim)960 vm_deferred_reclamation_reclaim_from_task_sync(task_t task, size_t max_entries_to_reclaim)
961 {
962 size_t num_reclaimed = 0;
963 vm_deferred_reclamation_metadata_t metadata = task->deferred_reclamation_metadata;
964
965 if (!task_is_active(task)) {
966 return false;
967 }
968
969 if (metadata != NULL) {
970 lck_mtx_lock(&metadata->vdrm_lock);
971 while (num_reclaimed < max_entries_to_reclaim) {
972 size_t num_reclaimed_now = reclaim_chunk(metadata);
973 if (num_reclaimed_now == kReclaimChunkFailed) {
974 /* Lock has already been released and task is being killed. */
975 return false;
976 }
977 if (num_reclaimed_now == 0) {
978 // Nothing left to reclaim
979 break;
980 }
981 num_reclaimed += num_reclaimed_now;
982 }
983 lck_mtx_unlock(&metadata->vdrm_lock);
984 }
985
986 return num_reclaimed > 0;
987 }
988
989 vm_deferred_reclamation_metadata_t
vm_deferred_reclamation_buffer_fork(task_t task,vm_deferred_reclamation_metadata_t parent)990 vm_deferred_reclamation_buffer_fork(task_t task, vm_deferred_reclamation_metadata_t parent)
991 {
992 kern_return_t kr;
993 vm_deferred_reclamation_metadata_t metadata = NULL;
994
995 LCK_MTX_ASSERT(&parent->vdrm_lock, LCK_MTX_ASSERT_OWNED);
996
997 assert(task->deferred_reclamation_metadata == NULL);
998 metadata = vmdr_metadata_alloc(task, parent->vdrm_reclaim_buffer,
999 parent->vdrm_buffer_size, parent->vdrm_reclaim_indices);
1000 lck_mtx_unlock(&parent->vdrm_lock);
1001
1002 kr = vmdr_metadata_wire(metadata);
1003 if (kr != KERN_SUCCESS) {
1004 vmdr_metadata_free(metadata);
1005 return NULL;
1006 }
1007
1008 lck_mtx_lock(&reclamation_buffers_lock);
1009 TAILQ_INSERT_TAIL(&reclamation_buffers, metadata, vdrm_list);
1010 reclamation_buffers_length++;
1011 lck_mtx_unlock(&reclamation_buffers_lock);
1012
1013 return metadata;
1014 }
1015
1016 void
vm_deferred_reclamation_buffer_lock(vm_deferred_reclamation_metadata_t metadata)1017 vm_deferred_reclamation_buffer_lock(vm_deferred_reclamation_metadata_t metadata)
1018 {
1019 lck_mtx_lock(&metadata->vdrm_lock);
1020 }
1021
1022 void
vm_deferred_reclamation_buffer_unlock(vm_deferred_reclamation_metadata_t metadata)1023 vm_deferred_reclamation_buffer_unlock(vm_deferred_reclamation_metadata_t metadata)
1024 {
1025 lck_mtx_unlock(&metadata->vdrm_lock);
1026 }
1027
1028
1029 static void
reclaim_thread_init(void)1030 reclaim_thread_init(void)
1031 {
1032 #if CONFIG_THREAD_GROUPS
1033 thread_group_vm_add();
1034 #endif
1035 thread_set_thread_name(current_thread(), "VM_reclaim");
1036 }
1037
1038
1039 static void
process_async_reclamation_list(void)1040 process_async_reclamation_list(void)
1041 {
1042 LCK_MTX_ASSERT(&async_reclamation_buffers_lock, LCK_MTX_ASSERT_OWNED);
1043
1044 vm_deferred_reclamation_metadata_t metadata = TAILQ_FIRST(&async_reclamation_buffers);
1045 while (metadata != NULL) {
1046 TAILQ_REMOVE(&async_reclamation_buffers, metadata, vdrm_async_list);
1047 metadata->vdrm_async_list.tqe_next = NULL;
1048 metadata->vdrm_async_list.tqe_prev = NULL;
1049 lck_mtx_lock(&metadata->vdrm_lock);
1050 lck_mtx_unlock(&async_reclamation_buffers_lock);
1051
1052 // NB: Currently the async reclaim thread fully reclaims the buffer.
1053 size_t num_reclaimed = reclaim_entries_from_buffer(metadata, 0);
1054 if (num_reclaimed == kReclaimChunkFailed) {
1055 /* Lock has already been released & task is in the process of getting killed. */
1056 goto next;
1057 }
1058 /* Wakeup anyone waiting on this buffer getting processed */
1059 thread_wakeup(&metadata->vdrm_async_list);
1060 assert(current_thread()->map == kernel_map);
1061 lck_mtx_unlock(&metadata->vdrm_lock);
1062
1063 next:
1064 lck_mtx_lock(&async_reclamation_buffers_lock);
1065 metadata = TAILQ_FIRST(&async_reclamation_buffers);
1066 }
1067 }
1068
1069 __enum_decl(reclaim_thread_state, uint32_t, {
1070 RECLAIM_THREAD_INIT = 0,
1071 RECLAIM_THREAD_CONT = 1,
1072 });
1073
1074 static void
reclaim_thread_continue(void)1075 reclaim_thread_continue(void)
1076 {
1077 lck_mtx_lock(&async_reclamation_buffers_lock);
1078
1079 process_async_reclamation_list();
1080 assert_wait(&vm_reclaim_thread, THREAD_UNINT);
1081
1082 lck_mtx_unlock(&async_reclamation_buffers_lock);
1083 }
1084
1085 void
reclaim_thread(void * param,wait_result_t wr __unused)1086 reclaim_thread(void *param, wait_result_t wr __unused)
1087 {
1088 if (param == (void *) RECLAIM_THREAD_INIT) {
1089 reclaim_thread_init();
1090 } else {
1091 assert(param == (void *) RECLAIM_THREAD_CONT);
1092 }
1093
1094 reclaim_thread_continue();
1095
1096 (void) thread_block_parameter(reclaim_thread, (void*) RECLAIM_THREAD_CONT);
1097 }
1098
1099 __startup_func
1100 static void
vm_deferred_reclamation_init(void)1101 vm_deferred_reclamation_init(void)
1102 {
1103 // Note: no-op pending rdar://27006343 (Custom kernel log handles)
1104 vm_reclaim_log_handle = os_log_create("com.apple.mach.vm", "reclaim");
1105
1106 (void)kernel_thread_start_priority(reclaim_thread,
1107 (void *)RECLAIM_THREAD_INIT, kReclaimThreadPriority,
1108 &vm_reclaim_thread);
1109 }
1110
1111 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, vm_deferred_reclamation_init);
1112
1113 #if DEVELOPMENT || DEBUG
1114
1115 bool
vm_deferred_reclamation_block_until_pid_has_been_reclaimed(int pid)1116 vm_deferred_reclamation_block_until_pid_has_been_reclaimed(int pid)
1117 {
1118 vm_deferred_reclamation_metadata_t metadata = NULL;
1119 proc_t p = proc_find(pid);
1120 vm_map_t map = NULL;
1121 if (p == NULL) {
1122 return false;
1123 }
1124 task_t t = proc_task(p);
1125 if (t == NULL) {
1126 proc_rele(p);
1127 return false;
1128 }
1129
1130 task_lock(t);
1131 if (t->map) {
1132 metadata = t->deferred_reclamation_metadata;
1133 if (metadata != NULL) {
1134 map = t->map;
1135 vm_map_reference(t->map);
1136 }
1137 }
1138 task_unlock(t);
1139 proc_rele(p);
1140 if (metadata == NULL) {
1141 return false;
1142 }
1143
1144 lck_mtx_lock(&async_reclamation_buffers_lock);
1145 while (metadata->vdrm_async_list.tqe_next != NULL || metadata->vdrm_async_list.tqe_prev != NULL) {
1146 assert_wait(&metadata->vdrm_async_list, THREAD_UNINT);
1147 lck_mtx_unlock(&async_reclamation_buffers_lock);
1148 thread_block(THREAD_CONTINUE_NULL);
1149 lck_mtx_lock(&async_reclamation_buffers_lock);
1150 }
1151
1152 /*
1153 * The async reclaim thread first removes the buffer from the list
1154 * and then reclaims it (while holding its lock).
1155 * So grab the metadata buffer's lock here to ensure the
1156 * reclaim is done.
1157 */
1158 lck_mtx_lock(&metadata->vdrm_lock);
1159 lck_mtx_unlock(&metadata->vdrm_lock);
1160 lck_mtx_unlock(&async_reclamation_buffers_lock);
1161
1162 vm_map_deallocate(map);
1163 return true;
1164 }
1165
1166 #endif /* DEVELOPMENT || DEBUG */
1167