1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #if defined(__LP64__)
30 /*
31 * Userspace functions for manipulating the reclaim buffer.
32 */
33 #include <inttypes.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #include <mach/vm_reclaim.h>
37 #include <mach/mach.h>
38 #include <mach/mach_vm.h>
39 #undef _mach_vm_user_
40 #include <mach/mach_vm_internal.h>
41 #include <mach/vm_map.h>
42 #include <mach/vm_statistics.h>
43 #include <os/atomic_private.h>
44 #include <mach/vm_page_size.h>
45
46
47 #pragma mark Utilities
48 #define _assert(__op, __condition, __cause) \
49 do { \
50 if (!(__condition)) { \
51 __builtin_trap(); \
52 } \
53 } while (0)
54
55 static uint64_t kAccountingThreshold;
56
57 static bool
update_accounting(mach_vm_reclaim_ringbuffer_v1_t ring_buffer,int64_t size)58 update_accounting(mach_vm_reclaim_ringbuffer_v1_t ring_buffer, int64_t size)
59 {
60 ring_buffer->va_in_buffer += size;
61 if ((ring_buffer->va_in_buffer > ring_buffer->last_accounting_given_to_kernel &&
62 ring_buffer->va_in_buffer - ring_buffer->last_accounting_given_to_kernel > kAccountingThreshold) ||
63 (ring_buffer->last_accounting_given_to_kernel > ring_buffer->va_in_buffer &&
64 ring_buffer->last_accounting_given_to_kernel - ring_buffer->va_in_buffer > kAccountingThreshold)) {
65 /*
66 * The caller should call mach_vm_reclaim_update_kernel_accounting.
67 * We store the value that they will give to the kernel here while we hold the lock.
68 * Technically it's out of sync with what the kernel has seen, but
69 * that will be rectified once the caller makes the mach_vm_reclaim_update_kernel_accounting call.
70 * If we forced this value to be in sync with the kernel's value
71 * all callers would start calling mach_vm_reclaim_update_kernel_accounting until one of them
72 * finishes & we'd have to take the ringbuffer lock again in
73 * mach_vm_reclaim_update_kernel_accounting.
74 */
75 ring_buffer->last_accounting_given_to_kernel = ring_buffer->va_in_buffer;
76 return true;
77 }
78 return false;
79 }
80
81 static inline
82 mach_vm_reclaim_entry_v1_t
construct_entry(mach_vm_address_t start_addr,uint32_t size)83 construct_entry(mach_vm_address_t start_addr, uint32_t size)
84 {
85 mach_vm_reclaim_entry_v1_t entry = {0ULL};
86 entry.address = start_addr;
87 entry.size = size;
88 return entry;
89 }
90
91 kern_return_t
mach_vm_reclaim_ringbuffer_init(mach_vm_reclaim_ringbuffer_v1_t ring_buffer)92 mach_vm_reclaim_ringbuffer_init(mach_vm_reclaim_ringbuffer_v1_t ring_buffer)
93 {
94 kAccountingThreshold = vm_page_size;
95 kern_return_t kr;
96 mach_vm_size_t buffer_size = vm_page_size;
97 bzero(ring_buffer, sizeof(struct mach_vm_reclaim_ringbuffer_v1_s));
98 size_t entries_size = buffer_size - \
99 offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
100 ring_buffer->buffer_len = entries_size / sizeof(mach_vm_reclaim_entry_v1_t);
101
102 int flags = VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT | VM_MAKE_TAG(VM_MEMORY_MALLOC);
103 kr = mach_vm_map(mach_task_self(), (mach_vm_address_t *)&ring_buffer->buffer,
104 buffer_size, 0, flags, MEMORY_OBJECT_NULL, 0, FALSE,
105 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
106 if (kr != KERN_SUCCESS) {
107 return kr;
108 }
109
110 kr = mach_vm_deferred_reclamation_buffer_init(mach_task_self(),
111 (mach_vm_address_t)ring_buffer->buffer, buffer_size);
112
113 if (kr != KERN_SUCCESS) {
114 mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)ring_buffer->buffer,
115 buffer_size);
116 return kr;
117 }
118
119 return KERN_SUCCESS;
120 }
121
122 uint64_t
mach_vm_reclaim_mark_free(mach_vm_reclaim_ringbuffer_v1_t ring_buffer,mach_vm_address_t start_addr,uint32_t size,bool * should_update_kernel_accounting)123 mach_vm_reclaim_mark_free(
124 mach_vm_reclaim_ringbuffer_v1_t ring_buffer, mach_vm_address_t start_addr, uint32_t size,
125 bool *should_update_kernel_accounting)
126 {
127 uint64_t idx = 0, head = 0;
128 mach_vm_reclaim_entry_v1_t entry = construct_entry(start_addr, size);
129 mach_vm_reclaim_indices_v1_t *indices = &ring_buffer->buffer->indices;
130 mach_vm_reclaim_entry_v1_t *buffer = ring_buffer->buffer->entries;
131 mach_vm_size_t buffer_len = ring_buffer->buffer_len;
132 *should_update_kernel_accounting = false;
133
134 idx = os_atomic_load_wide(&indices->tail, relaxed);
135 head = os_atomic_load_wide(&indices->head, relaxed);
136
137 // This leaves one entry empty at the end of the buffer to differentiate an empty buffer from a full one
138 while ((idx + 1) % buffer_len == head % buffer_len) {
139 /*
140 * Buffer is full. Ask the kernel to reap it.
141 */
142 mach_vm_deferred_reclamation_buffer_synchronize(mach_task_self(), buffer_len - 1);
143 head = os_atomic_load_wide(&indices->head, relaxed);
144 /* kernel had to march head forward at least kNumEntriesToReclaim. We hold the buffer lock so tail couldn't have changed */
145 _assert("mach_vm_reclaim_mark_free", os_atomic_load_wide(&indices->tail, relaxed) % size != head % buffer_len, head);
146 }
147
148 /*
149 * idx must be >= head & the buffer is not full so it's not possible for the kernel to be acting on the entry at (tail + 1) % size.
150 * Thus we don't need to check the busy pointer here.
151 */
152 buffer[idx % buffer_len] = entry;
153 os_atomic_thread_fence(seq_cst); // tail increment can not be seen before the entry is cleared in the buffer
154 os_atomic_inc(&indices->tail, relaxed);
155 *should_update_kernel_accounting = update_accounting(ring_buffer, size);
156
157 return idx;
158 }
159
160 bool
mach_vm_reclaim_mark_used(mach_vm_reclaim_ringbuffer_v1_t ring_buffer,uint64_t id,mach_vm_address_t start_addr,uint32_t size)161 mach_vm_reclaim_mark_used(
162 mach_vm_reclaim_ringbuffer_v1_t ring_buffer, uint64_t id, mach_vm_address_t start_addr, uint32_t size)
163 {
164 mach_vm_reclaim_indices_v1_t *indices = &ring_buffer->buffer->indices;
165 mach_vm_reclaim_entry_v1_t *buffer = ring_buffer->buffer->entries;
166 mach_vm_size_t buffer_len = ring_buffer->buffer_len;
167 uint64_t head = 0, busy = 0, original_tail = 0;
168 if (id == VM_RECLAIM_INDEX_NULL) {
169 // entry was never put in the reclaim ring buffer, so it's safe to re-use.
170 return true;
171 }
172
173 head = os_atomic_load_wide(&indices->head, relaxed);
174 if (id < head) {
175 /*
176 * This is just a fast path for the case where the buffer has wrapped.
177 * It's not strictly necessary beacuse idx must also be < busy.
178 * That's why we can use a relaxed load for the head ptr.
179 */
180 return false;
181 }
182
183 /* Attempt to move tail to idx */
184 original_tail = os_atomic_load_wide(&indices->tail, relaxed);
185 _assert("mach_vm_reclaim_mark_used", id < original_tail, original_tail);
186
187 os_atomic_store_wide(&indices->tail, id, relaxed);
188 os_atomic_thread_fence(seq_cst); // Our write to tail must happen before our read of busy
189 busy = os_atomic_load_wide(&indices->busy, relaxed);
190 if (id < busy) {
191 /* Kernel is acting on this entry. Undo. */
192 os_atomic_store_wide(&indices->tail, original_tail, relaxed);
193 return false;
194 }
195 mach_vm_reclaim_entry_v1_t *entry = &buffer[id % buffer_len];
196 _assert("mach_vm_reclaim_mark_used", entry->size == size && entry->address == start_addr, entry->address);
197
198 /* Sucessfully moved tail back. Can now overwrite the entry */
199 memset(entry, 0, sizeof(mach_vm_reclaim_entry_v1_t));
200 os_atomic_thread_fence(seq_cst); // tail increment can not be seen before the entry is cleared in the buffer
201 /* Reset tail. */
202 os_atomic_store_wide(&indices->tail, original_tail, relaxed);
203
204 update_accounting(ring_buffer, -(int64_t) size);
205
206 return true;
207 }
208
209 kern_return_t
mach_vm_reclaim_update_kernel_accounting(const mach_vm_reclaim_ringbuffer_v1_t ring_buffer)210 mach_vm_reclaim_update_kernel_accounting(const mach_vm_reclaim_ringbuffer_v1_t ring_buffer)
211 {
212 return mach_vm_deferred_reclamation_buffer_update_reclaimable_bytes(current_task(),
213 ring_buffer->va_in_buffer);
214 }
215
216 bool
mach_vm_reclaim_is_available(const mach_vm_reclaim_ringbuffer_v1_t ring_buffer,uint64_t id)217 mach_vm_reclaim_is_available(const mach_vm_reclaim_ringbuffer_v1_t ring_buffer, uint64_t id)
218 {
219 const mach_vm_reclaim_indices_v1_t *indices = &ring_buffer->buffer->indices;
220 if (id == VM_RECLAIM_INDEX_NULL) {
221 // entry was never put in the reclaim ring buffer, so it's safe to re-use.
222 return true;
223 }
224
225 /*
226 * If the kernel has marched its busy pointer past this entry, consider it reclaimed.
227 * It's possible that the kernel will not reclaim this entry yet b/c we're racing with it on
228 * another thread via mach_vm_reclaim_mark_used.
229 */
230 uint64_t busy = os_atomic_load_wide(&indices->busy, relaxed);
231
232 return id >= busy;
233 }
234
235 kern_return_t
mach_vm_reclaim_synchronize(mach_vm_reclaim_ringbuffer_v1_t ringbuffer,mach_vm_size_t num_entries_to_reclaim)236 mach_vm_reclaim_synchronize(mach_vm_reclaim_ringbuffer_v1_t ringbuffer, mach_vm_size_t num_entries_to_reclaim)
237 {
238 if (ringbuffer == NULL) {
239 return KERN_INVALID_ARGUMENT;
240 }
241
242 return mach_vm_deferred_reclamation_buffer_synchronize(mach_task_self(), num_entries_to_reclaim);
243 }
244
245 #endif /* defined(__LP64__) */
246