1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 #define LOCK_PRIVATE 1
57 #include <debug.h>
58 #include <kern/locks_internal.h>
59 #include <kern/lock_stat.h>
60 #include <kern/locks.h>
61 #include <kern/zalloc.h>
62 #include <kern/thread.h>
63 #include <kern/processor.h>
64 #include <kern/sched_prim.h>
65 #include <kern/debug.h>
66 #include <machine/atomic.h>
67 #include <machine/machine_cpu.h>
68
69 KALLOC_TYPE_DEFINE(KT_LCK_RW, lck_rw_t, KT_PRIV_ACCT);
70
71 #define LCK_RW_WRITER_EVENT(lck) (event_t)((uintptr_t)(lck)+1)
72 #define LCK_RW_READER_EVENT(lck) (event_t)((uintptr_t)(lck)+2)
73 #define WRITE_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-1))
74 #define READ_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-2))
75
76 #if CONFIG_DTRACE
77 #define DTRACE_RW_SHARED 0x0 //reader
78 #define DTRACE_RW_EXCL 0x1 //writer
79 #define DTRACE_NO_FLAG 0x0 //not applicable
80 #endif /* CONFIG_DTRACE */
81
82 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
83 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
84 #define LCK_RW_LCK_SHARED_CODE 0x102
85 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
86 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
87 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
88
89 #if __x86_64__
90 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
91 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
92 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
93 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
94 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
95 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
96 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
97 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
98 #endif
99
100 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
101 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
102
103 #define ordered_load_rw(lock) os_atomic_load(&(lock)->lck_rw_data, compiler_acq_rel)
104 #define ordered_store_rw(lock, value) os_atomic_store(&(lock)->lck_rw_data, (value), compiler_acq_rel)
105 #define ordered_store_rw_owner(lock, value) os_atomic_store(&(lock)->lck_rw_owner, (value), compiler_acq_rel)
106
107 #ifdef DEBUG_RW
108 static TUNABLE(bool, lck_rw_recursive_shared_assert_74048094, "lck_rw_recursive_shared_assert", false);
109 SECURITY_READ_ONLY_EARLY(vm_packing_params_t) rwlde_caller_packing_params =
110 VM_PACKING_PARAMS(LCK_RW_CALLER_PACKED);
111 #define rw_lock_debug_disabled() ((LcksOpts & disLkRWDebug) == disLkRWDebug)
112
113 #define set_rwlde_caller_packed(entry, caller) ((entry)->rwlde_caller_packed = VM_PACK_POINTER((vm_offset_t)caller, LCK_RW_CALLER_PACKED))
114 #define get_rwlde_caller(entry) ((void*)VM_UNPACK_POINTER(entry->rwlde_caller_packed, LCK_RW_CALLER_PACKED))
115
116 #endif /* DEBUG_RW */
117
118 /*!
119 * @function lck_rw_alloc_init
120 *
121 * @abstract
122 * Allocates and initializes a rw_lock_t.
123 *
124 * @discussion
125 * The function can block. See lck_rw_init() for initialization details.
126 *
127 * @param grp lock group to associate with the lock.
128 * @param attr lock attribute to initialize the lock.
129 *
130 * @returns NULL or the allocated lock
131 */
132 lck_rw_t *
lck_rw_alloc_init(lck_grp_t * grp,lck_attr_t * attr)133 lck_rw_alloc_init(
134 lck_grp_t *grp,
135 lck_attr_t *attr)
136 {
137 lck_rw_t *lck;
138
139 lck = zalloc_flags(KT_LCK_RW, Z_WAITOK | Z_ZERO);
140 lck_rw_init(lck, grp, attr);
141 return lck;
142 }
143
144 /*!
145 * @function lck_rw_init
146 *
147 * @abstract
148 * Initializes a rw_lock_t.
149 *
150 * @discussion
151 * Usage statistics for the lock are going to be added to the lock group provided.
152 *
153 * The lock attribute can be used to specify the lock contention behaviour.
154 * RW_WRITER_PRIORITY is the default behaviour (LCK_ATTR_NULL defaults to RW_WRITER_PRIORITY)
155 * and lck_attr_rw_shared_priority() can be used to set the behaviour to RW_SHARED_PRIORITY.
156 *
157 * RW_WRITER_PRIORITY gives priority to the writers upon contention with the readers;
158 * if the lock is held and a writer starts waiting for the lock, readers will not be able
159 * to acquire the lock until all writers stop contending. Readers could
160 * potentially starve.
161 * RW_SHARED_PRIORITY gives priority to the readers upon contention with the writers:
162 * unleass the lock is held in exclusive mode, readers will always be able to acquire the lock.
163 * Readers can lock a shared lock even if there are writers waiting. Writers could potentially
164 * starve.
165 *
166 * @param lck lock to initialize.
167 * @param grp lock group to associate with the lock.
168 * @param attr lock attribute to initialize the lock.
169 *
170 */
171 void
lck_rw_init(lck_rw_t * lck,lck_grp_t * grp,lck_attr_t * attr)172 lck_rw_init(
173 lck_rw_t *lck,
174 lck_grp_t *grp,
175 lck_attr_t *attr)
176 {
177 /* keep this so that the lck_type_t type is referenced for lldb */
178 lck_type_t type = LCK_TYPE_RW;
179
180 if (attr == LCK_ATTR_NULL) {
181 attr = &lck_attr_default;
182 }
183 *lck = (lck_rw_t){
184 .lck_rw_type = type,
185 .lck_rw_can_sleep = true,
186 .lck_rw_priv_excl = !(attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY),
187 };
188 lck_grp_reference(grp, &grp->lck_grp_rwcnt);
189 }
190
191 /*!
192 * @function lck_rw_free
193 *
194 * @abstract
195 * Frees a rw_lock previously allocated with lck_rw_alloc_init().
196 *
197 * @discussion
198 * The lock must be not held by any thread.
199 *
200 * @param lck rw_lock to free.
201 */
202 void
lck_rw_free(lck_rw_t * lck,lck_grp_t * grp)203 lck_rw_free(
204 lck_rw_t *lck,
205 lck_grp_t *grp)
206 {
207 lck_rw_destroy(lck, grp);
208 zfree(KT_LCK_RW, lck);
209 }
210
211 /*!
212 * @function lck_rw_destroy
213 *
214 * @abstract
215 * Destroys a rw_lock previously initialized with lck_rw_init().
216 *
217 * @discussion
218 * The lock must be not held by any thread.
219 *
220 * @param lck rw_lock to destroy.
221 */
222 void
lck_rw_destroy(lck_rw_t * lck,lck_grp_t * grp)223 lck_rw_destroy(
224 lck_rw_t *lck,
225 lck_grp_t *grp)
226 {
227 if (lck->lck_rw_type != LCK_TYPE_RW ||
228 lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
229 panic("Destroying previously destroyed lock %p", lck);
230 }
231 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
232
233 lck->lck_rw_type = LCK_TYPE_NONE;
234 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
235 lck_grp_deallocate(grp, &grp->lck_grp_rwcnt);
236 }
237
238 #ifdef DEBUG_RW
239
240 /*
241 * Best effort mechanism to debug rw_locks.
242 *
243 * This mechanism is in addition to the owner checks. The owner is set
244 * only when the lock is held in exclusive mode so the checks do not cover
245 * the cases in which the lock is held in shared mode.
246 *
247 * This mechanism tentatively stores the rw_lock acquired and its debug
248 * information on the thread struct.
249 * Just up to LCK_RW_EXPECTED_MAX_NUMBER rw lock debug information can be stored.
250 *
251 * NOTE: LCK_RW_EXPECTED_MAX_NUMBER is the expected number of rw_locks held
252 * at the same time. If a thread holds more than this number of rw_locks we
253 * will start losing debug information.
254 * Increasing LCK_RW_EXPECTED_MAX_NUMBER will increase the probability we will
255 * store the debug information but it will require more memory per thread
256 * and longer lock/unlock time.
257 *
258 * If an empty slot is found for the debug information, we record the lock
259 * otherwise we set the overflow threshold flag.
260 *
261 * If we reached the overflow threshold we might stop asserting because we cannot be sure
262 * anymore if the lock was acquired or not.
263 *
264 * Even if we reached the overflow threshold, we try to store the debug information
265 * for the new locks acquired. This can be useful in core dumps to debug
266 * possible return to userspace without unlocking and to find possible readers
267 * holding the lock.
268 */
269 __startup_func
270 static void
rw_lock_init(void)271 rw_lock_init(void)
272 {
273 if (kern_feature_override(KF_RW_LOCK_DEBUG_OVRD)) {
274 LcksOpts |= disLkRWDebug;
275 }
276 }
277 STARTUP(LOCKS, STARTUP_RANK_FIRST, rw_lock_init);
278
279 static inline struct rw_lock_debug_entry *
find_lock_in_savedlocks(lck_rw_t * lock,rw_lock_debug_t * rw_locks_held)280 find_lock_in_savedlocks(lck_rw_t* lock, rw_lock_debug_t *rw_locks_held)
281 {
282 int i;
283 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
284 struct rw_lock_debug_entry *existing = &rw_locks_held->rwld_locks[i];
285 if (existing->rwlde_lock == lock) {
286 return existing;
287 }
288 }
289
290 return NULL;
291 }
292
293 __abortlike
294 static void
rwlock_slot_panic(rw_lock_debug_t * rw_locks_held)295 rwlock_slot_panic(rw_lock_debug_t *rw_locks_held)
296 {
297 panic("No empty slot found in %p slot_used %d", rw_locks_held, rw_locks_held->rwld_locks_saved);
298 }
299
300 static inline struct rw_lock_debug_entry *
find_empty_slot(rw_lock_debug_t * rw_locks_held)301 find_empty_slot(rw_lock_debug_t *rw_locks_held)
302 {
303 int i;
304 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
305 struct rw_lock_debug_entry *entry = &rw_locks_held->rwld_locks[i];
306 if (entry->rwlde_lock == NULL) {
307 return entry;
308 }
309 }
310 rwlock_slot_panic(rw_locks_held);
311 }
312
313 __abortlike
314 static void
canlock_rwlock_panic(lck_rw_t * lock,thread_t thread,struct rw_lock_debug_entry * entry)315 canlock_rwlock_panic(lck_rw_t* lock, thread_t thread, struct rw_lock_debug_entry *entry)
316 {
317 panic("RW lock %p already held by %p caller %p mode_count %d state 0x%x owner 0x%p ",
318 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
319 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
320 }
321
322 static inline void
assert_canlock_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)323 assert_canlock_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
324 {
325 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
326
327 if (__probable(rw_lock_debug_disabled() || (rw_locks_held->rwld_locks_acquired == 0))) {
328 //no locks saved, safe to lock
329 return;
330 }
331
332 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
333 if (__improbable(entry != NULL)) {
334 boolean_t can_be_shared_recursive;
335 if (lck_rw_recursive_shared_assert_74048094) {
336 can_be_shared_recursive = (lock->lck_rw_priv_excl == 0);
337 } else {
338 /* currently rw_lock_shared is called recursively,
339 * until the code is fixed allow to lock
340 * recursively in shared mode
341 */
342 can_be_shared_recursive = TRUE;
343 }
344 if ((type == LCK_RW_TYPE_SHARED) && can_be_shared_recursive && entry->rwlde_mode_count >= 1) {
345 return;
346 }
347 canlock_rwlock_panic(lock, thread, entry);
348 }
349 }
350
351 __abortlike
352 static void
held_rwlock_notheld_panic(lck_rw_t * lock,thread_t thread)353 held_rwlock_notheld_panic(lck_rw_t* lock, thread_t thread)
354 {
355 panic("RW lock %p not held by %p", lock, thread);
356 }
357
358 __abortlike
359 static void
held_rwlock_notheld_with_info_panic(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,struct rw_lock_debug_entry * entry)360 held_rwlock_notheld_with_info_panic(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, struct rw_lock_debug_entry *entry)
361 {
362 if (type == LCK_RW_TYPE_EXCLUSIVE) {
363 panic("RW lock %p not held in exclusive by %p caller %p read %d state 0x%x owner 0x%p ",
364 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
365 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
366 } else {
367 panic("RW lock %p not held in shared by %p caller %p read %d state 0x%x owner 0x%p ",
368 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
369 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
370 }
371 }
372
373 static inline void
assert_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)374 assert_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
375 {
376 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
377
378 if (__probable(rw_lock_debug_disabled())) {
379 return;
380 }
381
382 if (__improbable(rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_locks_saved == 0)) {
383 if (rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_overflow == 0) {
384 held_rwlock_notheld_panic(lock, thread);
385 }
386 return;
387 }
388
389 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
390 if (__probable(entry != NULL)) {
391 if (type == LCK_RW_TYPE_EXCLUSIVE && entry->rwlde_mode_count != -1) {
392 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
393 } else {
394 if (type == LCK_RW_TYPE_SHARED && entry->rwlde_mode_count <= 0) {
395 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
396 }
397 }
398 } else {
399 if (rw_locks_held->rwld_overflow == 0) {
400 held_rwlock_notheld_panic(lock, thread);
401 }
402 }
403 }
404
405 static inline void
change_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)406 change_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
407 {
408 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
409
410 if (__probable(rw_lock_debug_disabled())) {
411 return;
412 }
413
414 if (__improbable(rw_locks_held->rwld_locks_saved == 0)) {
415 if (rw_locks_held->rwld_overflow == 0) {
416 held_rwlock_notheld_panic(lock, thread);
417 }
418 return;
419 }
420
421 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
422 if (__probable(entry != NULL)) {
423 if (typeFrom == LCK_RW_TYPE_SHARED) {
424 //We are upgrading
425 assertf(entry->rwlde_mode_count == 1,
426 "RW lock %p not held by a single shared when upgrading "
427 "by %p caller %p read %d state 0x%x owner 0x%p ",
428 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
429 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
430 entry->rwlde_mode_count = -1;
431 set_rwlde_caller_packed(entry, caller);
432 } else {
433 //We are downgrading
434 assertf(entry->rwlde_mode_count == -1,
435 "RW lock %p not held in write mode when downgrading "
436 "by %p caller %p read %d state 0x%x owner 0x%p ",
437 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
438 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
439 entry->rwlde_mode_count = 1;
440 set_rwlde_caller_packed(entry, caller);
441 }
442 return;
443 }
444
445 if (rw_locks_held->rwld_overflow == 0) {
446 held_rwlock_notheld_panic(lock, thread);
447 }
448
449 if (rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER) {
450 //array is full
451 return;
452 }
453
454 struct rw_lock_debug_entry *null_entry = find_empty_slot(rw_locks_held);
455 null_entry->rwlde_lock = lock;
456 set_rwlde_caller_packed(null_entry, caller);
457 if (typeFrom == LCK_RW_TYPE_SHARED) {
458 null_entry->rwlde_mode_count = -1;
459 } else {
460 null_entry->rwlde_mode_count = 1;
461 }
462 rw_locks_held->rwld_locks_saved++;
463 }
464
465 __abortlike
466 static void
add_held_rwlock_too_many_panic(thread_t thread)467 add_held_rwlock_too_many_panic(thread_t thread)
468 {
469 panic("RW lock too many rw locks held, rwld_locks_acquired maxed out for thread %p", thread);
470 }
471
472 static inline void
add_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)473 add_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
474 {
475 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
476 struct rw_lock_debug_entry *null_entry;
477
478 if (__probable(rw_lock_debug_disabled())) {
479 return;
480 }
481
482 if (__improbable(rw_locks_held->rwld_locks_acquired == UINT32_MAX)) {
483 add_held_rwlock_too_many_panic(thread);
484 }
485 rw_locks_held->rwld_locks_acquired++;
486
487 if (type == LCK_RW_TYPE_EXCLUSIVE) {
488 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
489 //array is full
490 rw_locks_held->rwld_overflow = 1;
491 return;
492 }
493 null_entry = find_empty_slot(rw_locks_held);
494 null_entry->rwlde_lock = lock;
495 set_rwlde_caller_packed(null_entry, caller);
496 null_entry->rwlde_mode_count = -1;
497 rw_locks_held->rwld_locks_saved++;
498 return;
499 } else {
500 if (__probable(rw_locks_held->rwld_locks_saved == 0)) {
501 //array is empty
502 goto add_shared;
503 }
504
505 boolean_t allow_shared_recursive;
506 if (lck_rw_recursive_shared_assert_74048094) {
507 allow_shared_recursive = (lock->lck_rw_priv_excl == 0);
508 } else {
509 allow_shared_recursive = TRUE;
510 }
511 if (allow_shared_recursive) {
512 //It could be already locked in shared mode
513 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
514 if (entry != NULL) {
515 assert(entry->rwlde_mode_count > 0);
516 assertf(entry->rwlde_mode_count != INT8_MAX,
517 "RW lock %p with too many recursive shared held "
518 "from %p caller %p read %d state 0x%x owner 0x%p",
519 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
520 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
521 entry->rwlde_mode_count += 1;
522 return;
523 }
524 }
525
526 //none of the locks were a match
527 //try to add a new entry
528 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
529 //array is full
530 rw_locks_held->rwld_overflow = 1;
531 return;
532 }
533
534 add_shared:
535 null_entry = find_empty_slot(rw_locks_held);
536 null_entry->rwlde_lock = lock;
537 set_rwlde_caller_packed(null_entry, caller);
538 null_entry->rwlde_mode_count = 1;
539 rw_locks_held->rwld_locks_saved++;
540 }
541 }
542
543 static inline void
remove_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)544 remove_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
545 {
546 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
547
548 if (__probable(rw_lock_debug_disabled())) {
549 return;
550 }
551
552 if (__improbable(rw_locks_held->rwld_locks_acquired == 0)) {
553 return;
554 }
555 rw_locks_held->rwld_locks_acquired--;
556
557 if (rw_locks_held->rwld_locks_saved == 0) {
558 assert(rw_locks_held->rwld_overflow == 1);
559 goto out;
560 }
561
562 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
563 if (__probable(entry != NULL)) {
564 if (type == LCK_RW_TYPE_EXCLUSIVE) {
565 assert(entry->rwlde_mode_count == -1);
566 entry->rwlde_mode_count = 0;
567 } else {
568 assert(entry->rwlde_mode_count > 0);
569 entry->rwlde_mode_count--;
570 if (entry->rwlde_mode_count > 0) {
571 goto out;
572 }
573 }
574 entry->rwlde_caller_packed = 0;
575 entry->rwlde_lock = NULL;
576 rw_locks_held->rwld_locks_saved--;
577 } else {
578 assert(rw_locks_held->rwld_overflow == 1);
579 }
580
581 out:
582 if (rw_locks_held->rwld_locks_acquired == 0) {
583 rw_locks_held->rwld_overflow = 0;
584 }
585 return;
586 }
587 #endif /* DEBUG_RW */
588
589 /*
590 * We disable interrupts while holding the RW interlock to prevent an
591 * interrupt from exacerbating hold time.
592 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
593 */
594 static inline boolean_t
lck_interlock_lock(lck_rw_t * lck)595 lck_interlock_lock(
596 lck_rw_t *lck)
597 {
598 boolean_t istate;
599
600 istate = ml_set_interrupts_enabled(FALSE);
601 lck_rw_ilk_lock(lck);
602 return istate;
603 }
604
605 static inline void
lck_interlock_unlock(lck_rw_t * lck,boolean_t istate)606 lck_interlock_unlock(
607 lck_rw_t *lck,
608 boolean_t istate)
609 {
610 lck_rw_ilk_unlock(lck);
611 ml_set_interrupts_enabled(istate);
612 }
613
614 static inline void
lck_rw_inc_thread_count(thread_t thread)615 lck_rw_inc_thread_count(
616 thread_t thread)
617 {
618 __assert_only uint32_t prev_rwlock_count;
619
620 prev_rwlock_count = thread->rwlock_count++;
621 #if MACH_ASSERT
622 /*
623 * Set the ast to check that the
624 * rwlock_count is going to be set to zero when
625 * going back to userspace.
626 * Set it only once when we increment it for the first time.
627 */
628 if (prev_rwlock_count == 0) {
629 act_set_debug_assert();
630 }
631 #endif
632 }
633
634 /*
635 * compute the deadline to spin against when
636 * waiting for a change of state on a lck_rw_t
637 */
638 static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t * lck)639 lck_rw_deadline_for_spin(
640 lck_rw_t *lck)
641 {
642 lck_rw_word_t word;
643
644 word.data = ordered_load_rw(lck);
645 if (word.can_sleep) {
646 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
647 /*
648 * there are already threads waiting on this lock... this
649 * implies that they have spun beyond their deadlines waiting for
650 * the desired state to show up so we will not bother spinning at this time...
651 * or
652 * the current number of threads sharing this lock exceeds our capacity to run them
653 * concurrently and since all states we're going to spin for require the rw_shared_count
654 * to be at 0, we'll not bother spinning since the latency for this to happen is
655 * unpredictable...
656 */
657 return mach_absolute_time();
658 }
659 return mach_absolute_time() + os_atomic_load(&MutexSpin, relaxed);
660 } else {
661 return mach_absolute_time() + (100000LL * 1000000000LL);
662 }
663 }
664
665 /*
666 * This inline is used when busy-waiting for an rw lock.
667 * If interrupts were disabled when the lock primitive was called,
668 * we poll the IPI handler for pending tlb flushes in x86.
669 */
670 static inline void
lck_rw_lock_pause(boolean_t interrupts_enabled)671 lck_rw_lock_pause(
672 boolean_t interrupts_enabled)
673 {
674 #if X86_64
675 if (!interrupts_enabled) {
676 handle_pending_TLB_flushes();
677 }
678 cpu_pause();
679 #else
680 (void) interrupts_enabled;
681 wait_for_event();
682 #endif
683 }
684
685 typedef enum __enum_closed {
686 LCK_RW_DRAIN_S_DRAINED = 0,
687 LCK_RW_DRAIN_S_NOT_DRAINED = 1,
688 LCK_RW_DRAIN_S_EARLY_RETURN = 2,
689 LCK_RW_DRAIN_S_TIMED_OUT = 3,
690 } lck_rw_drain_state_t;
691
692 static lck_rw_drain_state_t
693 lck_rw_drain_status(
694 lck_rw_t *lock,
695 uint32_t status_mask,
696 boolean_t wait,
697 bool (^lock_pause)(void))
698 {
699 uint64_t deadline = 0;
700 uint32_t data;
701 boolean_t istate = FALSE;
702
703 if (wait) {
704 deadline = lck_rw_deadline_for_spin(lock);
705 #if __x86_64__
706 istate = ml_get_interrupts_enabled();
707 #endif
708 }
709
710 for (;;) {
711 #if __x86_64__
712 data = os_atomic_load(&lock->lck_rw_data, relaxed);
713 #else
714 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
715 #endif
716 if ((data & status_mask) == 0) {
717 atomic_exchange_abort();
718 return LCK_RW_DRAIN_S_DRAINED;
719 }
720
721 if (!wait) {
722 atomic_exchange_abort();
723 return LCK_RW_DRAIN_S_NOT_DRAINED;
724 }
725
726 lck_rw_lock_pause(istate);
727
728 if (mach_absolute_time() >= deadline) {
729 return LCK_RW_DRAIN_S_TIMED_OUT;
730 }
731
732 if (lock_pause && lock_pause()) {
733 return LCK_RW_DRAIN_S_EARLY_RETURN;
734 }
735 }
736 }
737
738 /*
739 * Spin while interlock is held.
740 */
741 static inline void
lck_rw_interlock_spin(lck_rw_t * lock)742 lck_rw_interlock_spin(
743 lck_rw_t *lock)
744 {
745 uint32_t data, prev;
746
747 for (;;) {
748 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_relaxed);
749 if (data & LCK_RW_INTERLOCK) {
750 #if __x86_64__
751 cpu_pause();
752 #else
753 wait_for_event();
754 #endif
755 } else {
756 atomic_exchange_abort();
757 return;
758 }
759 }
760 }
761
762 #define LCK_RW_GRAB_WANT 0
763 #define LCK_RW_GRAB_SHARED 1
764
765 typedef enum __enum_closed __enum_options {
766 LCK_RW_GRAB_F_SHARED = 0x0, // Not really a flag obviously but makes call sites more readable.
767 LCK_RW_GRAB_F_WANT_EXCL = 0x1,
768 LCK_RW_GRAB_F_WAIT = 0x2,
769 } lck_rw_grab_flags_t;
770
771 typedef enum __enum_closed {
772 LCK_RW_GRAB_S_NOT_LOCKED = 0,
773 LCK_RW_GRAB_S_LOCKED = 1,
774 LCK_RW_GRAB_S_EARLY_RETURN = 2,
775 LCK_RW_GRAB_S_TIMED_OUT = 3,
776 } lck_rw_grab_state_t;
777
778 static lck_rw_grab_state_t
779 lck_rw_grab(
780 lck_rw_t *lock,
781 lck_rw_grab_flags_t flags,
782 bool (^lock_pause)(void))
783 {
784 uint64_t deadline = 0;
785 uint32_t data, prev;
786 boolean_t do_exch, istate = FALSE;
787
788 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
789
790 if ((flags & LCK_RW_GRAB_F_WAIT) != 0) {
791 deadline = lck_rw_deadline_for_spin(lock);
792 #if __x86_64__
793 istate = ml_get_interrupts_enabled();
794 #endif
795 }
796
797 for (;;) {
798 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
799 if (data & LCK_RW_INTERLOCK) {
800 atomic_exchange_abort();
801 lck_rw_interlock_spin(lock);
802 continue;
803 }
804 do_exch = FALSE;
805 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
806 if ((data & LCK_RW_WANT_EXCL) == 0) {
807 data |= LCK_RW_WANT_EXCL;
808 do_exch = TRUE;
809 }
810 } else { // LCK_RW_GRAB_SHARED
811 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
812 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
813 data += LCK_RW_SHARED_READER;
814 do_exch = TRUE;
815 }
816 }
817 if (do_exch) {
818 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
819 return LCK_RW_GRAB_S_LOCKED;
820 }
821 } else {
822 if ((flags & LCK_RW_GRAB_F_WAIT) == 0) {
823 atomic_exchange_abort();
824 return LCK_RW_GRAB_S_NOT_LOCKED;
825 }
826
827 lck_rw_lock_pause(istate);
828
829 if (mach_absolute_time() >= deadline) {
830 return LCK_RW_GRAB_S_TIMED_OUT;
831 }
832 if (lock_pause && lock_pause()) {
833 return LCK_RW_GRAB_S_EARLY_RETURN;
834 }
835 }
836 }
837 }
838
839 /*
840 * The inverse of lck_rw_grab - drops either the LCK_RW_WANT_EXCL bit or
841 * decrements the reader count. Doesn't deal with waking up waiters - i.e.
842 * should only be called when can_sleep is false.
843 */
844 static void
lck_rw_drop(lck_rw_t * lock,lck_rw_grab_flags_t flags)845 lck_rw_drop(lck_rw_t *lock, lck_rw_grab_flags_t flags)
846 {
847 uint32_t data, prev;
848
849 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
850 assert(!lock->lck_rw_can_sleep);
851
852 for (;;) {
853 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
854
855 /* Interlock should never be taken when can_sleep is false. */
856 assert3u(data & LCK_RW_INTERLOCK, ==, 0);
857
858 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
859 data &= ~LCK_RW_WANT_EXCL;
860 } else {
861 data -= LCK_RW_SHARED_READER;
862 }
863
864 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
865 break;
866 }
867
868 cpu_pause();
869 }
870
871 return;
872 }
873
874 static boolean_t
875 lck_rw_lock_exclusive_gen(
876 lck_rw_t *lock,
877 bool (^lock_pause)(void))
878 {
879 __assert_only thread_t self = current_thread();
880 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
881 lck_rw_word_t word;
882 int slept = 0;
883 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
884 lck_rw_drain_state_t drain_state = LCK_RW_DRAIN_S_NOT_DRAINED;
885 wait_result_t res = 0;
886 boolean_t istate;
887
888 #if CONFIG_DTRACE
889 boolean_t dtrace_ls_initialized = FALSE;
890 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
891 uint64_t wait_interval = 0;
892 int readers_at_sleep = 0;
893 #endif
894
895 assertf(lock->lck_rw_owner != self->ctid,
896 "Lock already held state=0x%x, owner=%p",
897 ordered_load_rw(lock), self);
898
899 #ifdef DEBUG_RW
900 /*
901 * Best effort attempt to check that this thread
902 * is not already holding the lock (this checks read mode too).
903 */
904 assert_canlock_rwlock(lock, self, LCK_RW_TYPE_EXCLUSIVE);
905 #endif /* DEBUG_RW */
906
907 /*
908 * Try to acquire the lck_rw_want_excl bit.
909 */
910 while (lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL, NULL) != LCK_RW_GRAB_S_LOCKED) {
911 #if CONFIG_DTRACE
912 if (dtrace_ls_initialized == FALSE) {
913 dtrace_ls_initialized = TRUE;
914 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
915 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
916 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
917 if (dtrace_ls_enabled) {
918 /*
919 * Either sleeping or spinning is happening,
920 * start a timing of our delay interval now.
921 */
922 readers_at_sleep = lock->lck_rw_shared_count;
923 wait_interval = mach_absolute_time();
924 }
925 }
926 #endif
927
928 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START,
929 trace_lck, 0, 0, 0, 0);
930
931 grab_state = lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT, lock_pause);
932
933 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END,
934 trace_lck, 0, 0, grab_state, 0);
935
936 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
937 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
938 break;
939 }
940 /*
941 * if we get here, the deadline has expired w/o us
942 * being able to grab the lock exclusively
943 * check to see if we're allowed to do a thread_block
944 */
945 word.data = ordered_load_rw(lock);
946 if (word.can_sleep) {
947 istate = lck_interlock_lock(lock);
948 word.data = ordered_load_rw(lock);
949
950 if (word.want_excl) {
951 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
952
953 word.w_waiting = 1;
954 ordered_store_rw(lock, word.data);
955
956 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
957 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
958 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
959 lck_interlock_unlock(lock, istate);
960 if (res == THREAD_WAITING) {
961 res = thread_block(THREAD_CONTINUE_NULL);
962 slept++;
963 }
964 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
965 } else {
966 word.want_excl = 1;
967 ordered_store_rw(lock, word.data);
968 lck_interlock_unlock(lock, istate);
969 break;
970 }
971 }
972 }
973
974 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
975 assert(lock_pause);
976 return FALSE;
977 }
978
979 /*
980 * Wait for readers (and upgrades) to finish...
981 */
982 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
983 #if CONFIG_DTRACE
984 /*
985 * Either sleeping or spinning is happening, start
986 * a timing of our delay interval now. If we set it
987 * to -1 we don't have accurate data so we cannot later
988 * decide to record a dtrace spin or sleep event.
989 */
990 if (dtrace_ls_initialized == FALSE) {
991 dtrace_ls_initialized = TRUE;
992 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
993 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
994 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
995 if (dtrace_ls_enabled) {
996 /*
997 * Either sleeping or spinning is happening,
998 * start a timing of our delay interval now.
999 */
1000 readers_at_sleep = lock->lck_rw_shared_count;
1001 wait_interval = mach_absolute_time();
1002 }
1003 }
1004 #endif
1005
1006 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1007
1008 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE, lock_pause);
1009
1010 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, drain_state, 0);
1011
1012 if (drain_state == LCK_RW_DRAIN_S_DRAINED ||
1013 drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1014 break;
1015 }
1016 /*
1017 * if we get here, the deadline has expired w/o us
1018 * being able to grab the lock exclusively
1019 * check to see if we're allowed to do a thread_block
1020 */
1021 word.data = ordered_load_rw(lock);
1022 if (word.can_sleep) {
1023 istate = lck_interlock_lock(lock);
1024 word.data = ordered_load_rw(lock);
1025
1026 if (word.shared_count != 0 || word.want_upgrade) {
1027 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1028
1029 word.w_waiting = 1;
1030 ordered_store_rw(lock, word.data);
1031
1032 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1033 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1034 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1035 lck_interlock_unlock(lock, istate);
1036
1037 if (res == THREAD_WAITING) {
1038 res = thread_block(THREAD_CONTINUE_NULL);
1039 slept++;
1040 }
1041 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1042 } else {
1043 lck_interlock_unlock(lock, istate);
1044 /*
1045 * must own the lock now, since we checked for
1046 * readers or upgrade owner behind the interlock
1047 * no need for a call to 'lck_rw_drain_status'
1048 */
1049 break;
1050 }
1051 }
1052 }
1053
1054 #if CONFIG_DTRACE
1055 /*
1056 * Decide what latencies we suffered that are Dtrace events.
1057 * If we have set wait_interval, then we either spun or slept.
1058 * At least we get out from under the interlock before we record
1059 * which is the best we can do here to minimize the impact
1060 * of the tracing.
1061 * If we have set wait_interval to -1, then dtrace was not enabled when we
1062 * started sleeping/spinning so we don't record this event.
1063 */
1064 if (dtrace_ls_enabled == TRUE) {
1065 if (slept == 0) {
1066 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1067 mach_absolute_time() - wait_interval, 1);
1068 } else {
1069 /*
1070 * For the blocking case, we also record if when we blocked
1071 * it was held for read or write, and how many readers.
1072 * Notice that above we recorded this before we dropped
1073 * the interlock so the count is accurate.
1074 */
1075 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1076 mach_absolute_time() - wait_interval, 1,
1077 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1078 }
1079 }
1080 #endif /* CONFIG_DTRACE */
1081
1082 if (drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1083 lck_rw_drop(lock, LCK_RW_GRAB_F_WANT_EXCL);
1084 assert(lock_pause);
1085 return FALSE;
1086 }
1087
1088 #if CONFIG_DTRACE
1089 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1090 #endif /* CONFIG_DTRACE */
1091
1092 return TRUE;
1093 }
1094
1095 #define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
1096 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
1097 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
1098 /*!
1099 * @function lck_rw_lock_exclusive_check_contended
1100 *
1101 * @abstract
1102 * Locks a rw_lock in exclusive mode.
1103 *
1104 * @discussion
1105 * This routine IS EXPERIMENTAL.
1106 * It's only used for the vm object lock, and use for other subsystems is UNSUPPORTED.
1107 * Note that the return value is ONLY A HEURISTIC w.r.t. the lock's contention.
1108 *
1109 * @param lock rw_lock to lock.
1110 *
1111 * @returns Returns TRUE if the thread spun or blocked while attempting to acquire the lock, FALSE
1112 * otherwise.
1113 */
1114 bool
lck_rw_lock_exclusive_check_contended(lck_rw_t * lock)1115 lck_rw_lock_exclusive_check_contended(
1116 lck_rw_t *lock)
1117 {
1118 thread_t thread = current_thread();
1119 bool contended = false;
1120
1121 if (lock->lck_rw_can_sleep) {
1122 lck_rw_inc_thread_count(thread);
1123 } else if (get_preemption_level() == 0) {
1124 panic("Taking non-sleepable RW lock with preemption enabled");
1125 }
1126
1127 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1128 #if CONFIG_DTRACE
1129 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1130 #endif /* CONFIG_DTRACE */
1131 } else {
1132 contended = true;
1133 (void) lck_rw_lock_exclusive_gen(lock, NULL);
1134 }
1135 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1136 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1137 ordered_store_rw_owner(lock, thread->ctid);
1138
1139 #ifdef DEBUG_RW
1140 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, __builtin_return_address(0));
1141 #endif /* DEBUG_RW */
1142 return contended;
1143 }
1144
1145 __attribute__((always_inline))
1146 static boolean_t
1147 lck_rw_lock_exclusive_internal_inline(
1148 lck_rw_t *lock,
1149 void *caller,
1150 bool (^lock_pause)(void))
1151 {
1152 #pragma unused(caller)
1153 thread_t thread = current_thread();
1154
1155 if (lock->lck_rw_can_sleep) {
1156 lck_rw_inc_thread_count(thread);
1157 } else if (get_preemption_level() == 0) {
1158 panic("Taking non-sleepable RW lock with preemption enabled");
1159 }
1160
1161 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1162 #if CONFIG_DTRACE
1163 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1164 #endif /* CONFIG_DTRACE */
1165 } else if (!lck_rw_lock_exclusive_gen(lock, lock_pause)) {
1166 /*
1167 * lck_rw_lock_exclusive_gen() should only return
1168 * early if lock_pause has been passed and
1169 * returns FALSE. lock_pause is exclusive with
1170 * lck_rw_can_sleep().
1171 */
1172 assert(!lock->lck_rw_can_sleep);
1173 return FALSE;
1174 }
1175
1176 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1177 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1178 ordered_store_rw_owner(lock, thread->ctid);
1179
1180 #if DEBUG_RW
1181 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1182 #endif /* DEBUG_RW */
1183
1184 return TRUE;
1185 }
1186
1187 __attribute__((noinline))
1188 static void
lck_rw_lock_exclusive_internal(lck_rw_t * lock,void * caller)1189 lck_rw_lock_exclusive_internal(
1190 lck_rw_t *lock,
1191 void *caller)
1192 {
1193 (void) lck_rw_lock_exclusive_internal_inline(lock, caller, NULL);
1194 }
1195
1196 /*!
1197 * @function lck_rw_lock_exclusive
1198 *
1199 * @abstract
1200 * Locks a rw_lock in exclusive mode.
1201 *
1202 * @discussion
1203 * This function can block.
1204 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1205 * can acquire it in exclusive mode.
1206 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1207 *
1208 * @param lock rw_lock to lock.
1209 */
1210 void
lck_rw_lock_exclusive(lck_rw_t * lock)1211 lck_rw_lock_exclusive(
1212 lck_rw_t *lock)
1213 {
1214 (void) lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), NULL);
1215 }
1216
1217 /*!
1218 * @function lck_rw_lock_exclusive_b
1219 *
1220 * @abstract
1221 * Locks a rw_lock in exclusive mode. Returns early if the lock can't be acquired
1222 * and the specified block returns true.
1223 *
1224 * @discussion
1225 * Identical to lck_rw_lock_exclusive() but can return early if the lock can't be
1226 * acquired and the specified block returns true. The block is called
1227 * repeatedly when waiting to acquire the lock.
1228 * Should only be called when the lock cannot sleep (i.e. when
1229 * lock->lck_rw_can_sleep is false).
1230 *
1231 * @param lock rw_lock to lock.
1232 * @param lock_pause block invoked while waiting to acquire lock
1233 *
1234 * @returns Returns TRUE if the lock is successfully taken,
1235 * FALSE if the block returns true and the lock has
1236 * not been acquired.
1237 */
1238 boolean_t
1239 lck_rw_lock_exclusive_b(
1240 lck_rw_t *lock,
1241 bool (^lock_pause)(void))
1242 {
1243 assert(!lock->lck_rw_can_sleep);
1244
1245 return lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), lock_pause);
1246 }
1247
1248 /*
1249 * Routine: lck_rw_lock_shared_gen
1250 * Function:
1251 * Fast path code has determined that this lock
1252 * is held exclusively... this is where we spin/block
1253 * until we can acquire the lock in the shared mode
1254 */
1255 static boolean_t
1256 lck_rw_lock_shared_gen(
1257 lck_rw_t *lck,
1258 bool (^lock_pause)(void))
1259 {
1260 __assert_only thread_t self = current_thread();
1261 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1262 lck_rw_word_t word;
1263 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
1264 int slept = 0;
1265 wait_result_t res = 0;
1266 boolean_t istate;
1267
1268 #if CONFIG_DTRACE
1269 uint64_t wait_interval = 0;
1270 int readers_at_sleep = 0;
1271 boolean_t dtrace_ls_initialized = FALSE;
1272 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1273 #endif /* CONFIG_DTRACE */
1274
1275 assertf(lck->lck_rw_owner != self->ctid,
1276 "Lock already held state=0x%x, owner=%p",
1277 ordered_load_rw(lck), self);
1278
1279 #ifdef DEBUG_RW
1280 /*
1281 * Best effort attempt to check that this thread
1282 * is not already holding the lock in shared mode.
1283 */
1284 assert_canlock_rwlock(lck, self, LCK_RW_TYPE_SHARED);
1285 #endif
1286
1287 while (lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED, NULL) != LCK_RW_GRAB_S_LOCKED) {
1288 #if CONFIG_DTRACE
1289 if (dtrace_ls_initialized == FALSE) {
1290 dtrace_ls_initialized = TRUE;
1291 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1292 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1293 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1294 if (dtrace_ls_enabled) {
1295 /*
1296 * Either sleeping or spinning is happening,
1297 * start a timing of our delay interval now.
1298 */
1299 readers_at_sleep = lck->lck_rw_shared_count;
1300 wait_interval = mach_absolute_time();
1301 }
1302 }
1303 #endif
1304
1305 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1306 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
1307
1308 grab_state = lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED | LCK_RW_GRAB_F_WAIT, lock_pause);
1309
1310 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1311 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, grab_state, 0);
1312
1313 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
1314 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1315 break;
1316 }
1317
1318 /*
1319 * if we get here, the deadline has expired w/o us
1320 * being able to grab the lock for read
1321 * check to see if we're allowed to do a thread_block
1322 */
1323 if (lck->lck_rw_can_sleep) {
1324 istate = lck_interlock_lock(lck);
1325
1326 word.data = ordered_load_rw(lck);
1327 if ((word.want_excl || word.want_upgrade) &&
1328 ((word.shared_count == 0) || word.priv_excl)) {
1329 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1330 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
1331
1332 word.r_waiting = 1;
1333 ordered_store_rw(lck, word.data);
1334
1335 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
1336 res = assert_wait(LCK_RW_READER_EVENT(lck),
1337 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1338 lck_interlock_unlock(lck, istate);
1339
1340 if (res == THREAD_WAITING) {
1341 res = thread_block(THREAD_CONTINUE_NULL);
1342 slept++;
1343 }
1344 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1345 trace_lck, res, slept, 0, 0);
1346 } else {
1347 word.shared_count++;
1348 ordered_store_rw(lck, word.data);
1349 lck_interlock_unlock(lck, istate);
1350 break;
1351 }
1352 }
1353 }
1354
1355 #if CONFIG_DTRACE
1356 if (dtrace_ls_enabled == TRUE) {
1357 if (slept == 0) {
1358 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1359 } else {
1360 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1361 mach_absolute_time() - wait_interval, 0,
1362 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1363 }
1364 }
1365 #endif /* CONFIG_DTRACE */
1366
1367 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1368 assert(lock_pause);
1369 return FALSE;
1370 }
1371
1372 #if CONFIG_DTRACE
1373 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1374 #endif /* CONFIG_DTRACE */
1375
1376 return TRUE;
1377 }
1378
1379 __attribute__((always_inline))
1380 static boolean_t
1381 lck_rw_lock_shared_internal_inline(
1382 lck_rw_t *lock,
1383 void *caller,
1384 bool (^lock_pause)(void))
1385 {
1386 #pragma unused(caller)
1387
1388 uint32_t data, prev;
1389 thread_t thread = current_thread();
1390 #ifdef DEBUG_RW
1391 boolean_t check_canlock = TRUE;
1392 #endif
1393
1394 if (lock->lck_rw_can_sleep) {
1395 lck_rw_inc_thread_count(thread);
1396 } else if (get_preemption_level() == 0) {
1397 panic("Taking non-sleepable RW lock with preemption enabled");
1398 }
1399
1400 for (;;) {
1401 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1402 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1403 atomic_exchange_abort();
1404 if (!lck_rw_lock_shared_gen(lock, lock_pause)) {
1405 /*
1406 * lck_rw_lock_shared_gen() should only return
1407 * early if lock_pause has been passed and
1408 * returns FALSE. lock_pause is exclusive with
1409 * lck_rw_can_sleep().
1410 */
1411 assert(!lock->lck_rw_can_sleep);
1412 return FALSE;
1413 }
1414
1415 goto locked;
1416 }
1417 #ifdef DEBUG_RW
1418 if ((data & LCK_RW_SHARED_MASK) == 0) {
1419 /*
1420 * If the lock is uncontended,
1421 * we do not need to check if we can lock it
1422 */
1423 check_canlock = FALSE;
1424 }
1425 #endif
1426 data += LCK_RW_SHARED_READER;
1427 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1428 break;
1429 }
1430 cpu_pause();
1431 }
1432 #ifdef DEBUG_RW
1433 if (check_canlock) {
1434 /*
1435 * Best effort attempt to check that this thread
1436 * is not already holding the lock (this checks read mode too).
1437 */
1438 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1439 }
1440 #endif
1441 locked:
1442 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1443 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1444
1445 #if CONFIG_DTRACE
1446 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1447 #endif /* CONFIG_DTRACE */
1448
1449 #ifdef DEBUG_RW
1450 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1451 #endif /* DEBUG_RW */
1452
1453 return TRUE;
1454 }
1455
1456 __attribute__((noinline))
1457 static void
lck_rw_lock_shared_internal(lck_rw_t * lock,void * caller)1458 lck_rw_lock_shared_internal(
1459 lck_rw_t *lock,
1460 void *caller)
1461 {
1462 (void) lck_rw_lock_shared_internal_inline(lock, caller, NULL);
1463 }
1464
1465 /*!
1466 * @function lck_rw_lock_shared
1467 *
1468 * @abstract
1469 * Locks a rw_lock in shared mode.
1470 *
1471 * @discussion
1472 * This function can block.
1473 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1474 * can acquire it in exclusive mode.
1475 * If the lock is held in shared mode and there are no writers waiting, a reader will be able to acquire
1476 * the lock without waiting.
1477 * If the lock is held in shared mode and there is at least a writer waiting, a reader will wait
1478 * for all the writers to make progress if the lock was initialized with the default settings. Instead if
1479 * RW_SHARED_PRIORITY was selected at initialization time, a reader will never wait if the lock is held
1480 * in shared mode.
1481 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1482 *
1483 * @param lock rw_lock to lock.
1484 */
1485 void
lck_rw_lock_shared(lck_rw_t * lock)1486 lck_rw_lock_shared(
1487 lck_rw_t *lock)
1488 {
1489 (void) lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), NULL);
1490 }
1491
1492 /*!
1493 * @function lck_rw_lock_shared_b
1494 *
1495 * @abstract
1496 * Locks a rw_lock in shared mode. Returns early if the lock can't be acquired
1497 * and the specified block returns true.
1498 *
1499 * @discussion
1500 * Identical to lck_rw_lock_shared() but can return early if the lock can't be
1501 * acquired and the specified block returns true. The block is called
1502 * repeatedly when waiting to acquire the lock.
1503 * Should only be called when the lock cannot sleep (i.e. when
1504 * lock->lck_rw_can_sleep is false).
1505 *
1506 * @param lock rw_lock to lock.
1507 * @param lock_pause block invoked while waiting to acquire lock
1508 *
1509 * @returns Returns TRUE if the lock is successfully taken,
1510 * FALSE if the block returns true and the lock has
1511 * not been acquired.
1512 */
1513 boolean_t
1514 lck_rw_lock_shared_b(
1515 lck_rw_t *lock,
1516 bool (^lock_pause)(void))
1517 {
1518 assert(!lock->lck_rw_can_sleep);
1519
1520 return lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), lock_pause);
1521 }
1522
1523 /*
1524 * Routine: lck_rw_lock_shared_to_exclusive_failure
1525 * Function:
1526 * Fast path code has already dropped our read
1527 * count and determined that someone else owns 'lck_rw_want_upgrade'
1528 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1529 * all we need to do here is determine if a wakeup is needed
1530 */
1531 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t * lck,uint32_t prior_lock_state)1532 lck_rw_lock_shared_to_exclusive_failure(
1533 lck_rw_t *lck,
1534 uint32_t prior_lock_state)
1535 {
1536 thread_t thread = current_thread();
1537 uint32_t rwlock_count;
1538
1539 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1540 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1541 /*
1542 * Someone else has requested upgrade.
1543 * Since we've released the read lock, wake
1544 * him up if he's blocked waiting
1545 */
1546 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1547 }
1548
1549 /* Check if dropping the lock means that we need to unpromote */
1550 if (lck->lck_rw_can_sleep) {
1551 rwlock_count = thread->rwlock_count--;
1552 } else {
1553 rwlock_count = UINT32_MAX;
1554 }
1555
1556 if (rwlock_count == 0) {
1557 panic("rw lock count underflow for thread %p", thread);
1558 }
1559
1560 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1561 /* sched_flags checked without lock, but will be rechecked while clearing */
1562 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1563 }
1564
1565 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1566 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1567
1568 #ifdef DEBUG_RW
1569 remove_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
1570 #endif /* DEBUG_RW */
1571
1572 return FALSE;
1573 }
1574
1575 /*
1576 * Routine: lck_rw_lock_shared_to_exclusive_success
1577 * Function:
1578 * the fast path code has already dropped our read
1579 * count and successfully acquired 'lck_rw_want_upgrade'
1580 * we just need to wait for the rest of the readers to drain
1581 * and then we can return as the exclusive holder of this lock
1582 */
1583 static void
lck_rw_lock_shared_to_exclusive_success(lck_rw_t * lock)1584 lck_rw_lock_shared_to_exclusive_success(
1585 lck_rw_t *lock)
1586 {
1587 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1588 int slept = 0;
1589 lck_rw_word_t word;
1590 wait_result_t res;
1591 boolean_t istate;
1592 lck_rw_drain_state_t drain_state;
1593
1594 #if CONFIG_DTRACE
1595 uint64_t wait_interval = 0;
1596 int readers_at_sleep = 0;
1597 boolean_t dtrace_ls_initialized = FALSE;
1598 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1599 #endif
1600
1601 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
1602 word.data = ordered_load_rw(lock);
1603 #if CONFIG_DTRACE
1604 if (dtrace_ls_initialized == FALSE) {
1605 dtrace_ls_initialized = TRUE;
1606 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1607 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1608 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1609 if (dtrace_ls_enabled) {
1610 /*
1611 * Either sleeping or spinning is happening,
1612 * start a timing of our delay interval now.
1613 */
1614 readers_at_sleep = word.shared_count;
1615 wait_interval = mach_absolute_time();
1616 }
1617 }
1618 #endif
1619
1620 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1621 trace_lck, word.shared_count, 0, 0, 0);
1622
1623 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE, NULL);
1624
1625 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1626 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1627
1628 if (drain_state == LCK_RW_DRAIN_S_DRAINED) {
1629 break;
1630 }
1631
1632 /*
1633 * if we get here, the spin deadline in lck_rw_wait_on_status()
1634 * has expired w/o the rw_shared_count having drained to 0
1635 * check to see if we're allowed to do a thread_block
1636 */
1637 if (word.can_sleep) {
1638 istate = lck_interlock_lock(lock);
1639
1640 word.data = ordered_load_rw(lock);
1641 if (word.shared_count != 0) {
1642 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1643 trace_lck, word.shared_count, 0, 0, 0);
1644
1645 word.w_waiting = 1;
1646 ordered_store_rw(lock, word.data);
1647
1648 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1649 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1650 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1651 lck_interlock_unlock(lock, istate);
1652
1653 if (res == THREAD_WAITING) {
1654 res = thread_block(THREAD_CONTINUE_NULL);
1655 slept++;
1656 }
1657 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1658 trace_lck, res, slept, 0, 0);
1659 } else {
1660 lck_interlock_unlock(lock, istate);
1661 break;
1662 }
1663 }
1664 }
1665 #if CONFIG_DTRACE
1666 /*
1667 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1668 */
1669 if (dtrace_ls_enabled == TRUE) {
1670 if (slept == 0) {
1671 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1672 } else {
1673 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1674 mach_absolute_time() - wait_interval, 1,
1675 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1676 }
1677 }
1678 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1679 #endif
1680 }
1681
1682 /*!
1683 * @function lck_rw_lock_shared_to_exclusive
1684 *
1685 * @abstract
1686 * Upgrades a rw_lock held in shared mode to exclusive.
1687 *
1688 * @discussion
1689 * This function can block.
1690 * Only one reader at a time can upgrade to exclusive mode. If the upgrades fails the function will
1691 * return with the lock not held.
1692 * The caller needs to hold the lock in shared mode to upgrade it.
1693 *
1694 * @param lock rw_lock already held in shared mode to upgrade.
1695 *
1696 * @returns TRUE if the lock was upgraded, FALSE if it was not possible.
1697 * If the function was not able to upgrade the lock, the lock will be dropped
1698 * by the function.
1699 */
1700 boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t * lock)1701 lck_rw_lock_shared_to_exclusive(
1702 lck_rw_t *lock)
1703 {
1704 thread_t thread = current_thread();
1705 uint32_t data, prev;
1706
1707 assertf(lock->lck_rw_priv_excl != 0, "lock %p thread %p", lock, current_thread());
1708
1709 #if DEBUG_RW
1710 assert_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1711 #endif /* DEBUG_RW */
1712
1713 for (;;) {
1714 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1715 if (data & LCK_RW_INTERLOCK) {
1716 atomic_exchange_abort();
1717 lck_rw_interlock_spin(lock);
1718 continue;
1719 }
1720 if (data & LCK_RW_WANT_UPGRADE) {
1721 data -= LCK_RW_SHARED_READER;
1722 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1723 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1724 }
1725 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1726 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1727 }
1728 } else {
1729 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1730 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1731 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1732 break;
1733 }
1734 }
1735 cpu_pause();
1736 }
1737 /* we now own the WANT_UPGRADE */
1738 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1739 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1740 }
1741
1742 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1743 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1744
1745 ordered_store_rw_owner(lock, thread->ctid);
1746 #if CONFIG_DTRACE
1747 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1748 #endif /* CONFIG_DTRACE */
1749
1750 #if DEBUG_RW
1751 change_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, __builtin_return_address(0));
1752 #endif /* DEBUG_RW */
1753 return TRUE;
1754 }
1755
1756 /*
1757 * Routine: lck_rw_lock_exclusive_to_shared_gen
1758 * Function:
1759 * Fast path has already dropped
1760 * our exclusive state and bumped lck_rw_shared_count
1761 * all we need to do here is determine if anyone
1762 * needs to be awakened.
1763 */
1764 static void
lck_rw_lock_exclusive_to_shared_gen(lck_rw_t * lck,uint32_t prior_lock_state,void * caller)1765 lck_rw_lock_exclusive_to_shared_gen(
1766 lck_rw_t *lck,
1767 uint32_t prior_lock_state,
1768 void *caller)
1769 {
1770 #pragma unused(caller)
1771 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1772 lck_rw_word_t fake_lck;
1773
1774 /*
1775 * prior_lock state is a snapshot of the 1st word of the
1776 * lock in question... we'll fake up a pointer to it
1777 * and carefully not access anything beyond whats defined
1778 * in the first word of a lck_rw_t
1779 */
1780 fake_lck.data = prior_lock_state;
1781
1782 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1783 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1784
1785 /*
1786 * don't wake up anyone waiting to take the lock exclusively
1787 * since we hold a read count... when the read count drops to 0,
1788 * the writers will be woken.
1789 *
1790 * wake up any waiting readers if we don't have any writers waiting,
1791 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1792 */
1793 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1794 thread_wakeup(LCK_RW_READER_EVENT(lck));
1795 }
1796
1797 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1798 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1799
1800 #if CONFIG_DTRACE
1801 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1802 #endif
1803
1804 #if DEBUG_RW
1805 thread_t thread = current_thread();
1806 change_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1807 #endif /* DEBUG_RW */
1808 }
1809
1810 /*!
1811 * @function lck_rw_lock_exclusive_to_shared
1812 *
1813 * @abstract
1814 * Downgrades a rw_lock held in exclusive mode to shared.
1815 *
1816 * @discussion
1817 * The caller needs to hold the lock in exclusive mode to be able to downgrade it.
1818 *
1819 * @param lock rw_lock already held in exclusive mode to downgrade.
1820 */
1821 void
lck_rw_lock_exclusive_to_shared(lck_rw_t * lock)1822 lck_rw_lock_exclusive_to_shared(
1823 lck_rw_t *lock)
1824 {
1825 uint32_t data, prev;
1826
1827 assertf(lock->lck_rw_owner == current_thread()->ctid,
1828 "state=0x%x, owner=%p", lock->lck_rw_data,
1829 ctid_get_thread_unsafe(lock->lck_rw_owner));
1830 ordered_store_rw_owner(lock, 0);
1831
1832 for (;;) {
1833 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1834 if (data & LCK_RW_INTERLOCK) {
1835 atomic_exchange_abort();
1836 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1837 continue;
1838 }
1839 data += LCK_RW_SHARED_READER;
1840 if (data & LCK_RW_WANT_UPGRADE) {
1841 data &= ~(LCK_RW_WANT_UPGRADE);
1842 } else {
1843 data &= ~(LCK_RW_WANT_EXCL);
1844 }
1845 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1846 data &= ~(LCK_RW_W_WAITING);
1847 }
1848 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1849 break;
1850 }
1851 cpu_pause();
1852 }
1853 lck_rw_lock_exclusive_to_shared_gen(lock, prev, __builtin_return_address(0));
1854 }
1855
1856 /*
1857 * Very sad hack, but the codegen for lck_rw_lock
1858 * is very unhappy with the combination of __builtin_return_address()
1859 * and a noreturn function. For some reason it adds more frames
1860 * than it should. rdar://76570684
1861 */
1862 void
1863 _lck_rw_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1864 #pragma clang diagnostic push
1865 #pragma clang diagnostic ignored "-Wmissing-noreturn"
1866 __attribute__((noinline, weak))
1867 void
_lck_rw_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1868 _lck_rw_lock_type_panic(
1869 lck_rw_t *lck,
1870 lck_rw_type_t lck_rw_type)
1871 {
1872 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1873 }
1874 #pragma clang diagnostic pop
1875
1876 /*!
1877 * @function lck_rw_lock
1878 *
1879 * @abstract
1880 * Locks a rw_lock with the specified type.
1881 *
1882 * @discussion
1883 * See lck_rw_lock_shared() or lck_rw_lock_exclusive() for more details.
1884 *
1885 * @param lck rw_lock to lock.
1886 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1887 */
1888 void
lck_rw_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1889 lck_rw_lock(
1890 lck_rw_t *lck,
1891 lck_rw_type_t lck_rw_type)
1892 {
1893 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1894 return lck_rw_lock_shared_internal(lck, __builtin_return_address(0));
1895 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1896 return lck_rw_lock_exclusive_internal(lck, __builtin_return_address(0));
1897 }
1898 _lck_rw_lock_type_panic(lck, lck_rw_type);
1899 }
1900
1901 __attribute__((always_inline))
1902 static boolean_t
lck_rw_try_lock_shared_internal_inline(lck_rw_t * lock,void * caller)1903 lck_rw_try_lock_shared_internal_inline(
1904 lck_rw_t *lock,
1905 void *caller)
1906 {
1907 #pragma unused(caller)
1908
1909 uint32_t data, prev;
1910 thread_t thread = current_thread();
1911 #ifdef DEBUG_RW
1912 boolean_t check_canlock = TRUE;
1913 #endif
1914
1915 for (;;) {
1916 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1917 if (data & LCK_RW_INTERLOCK) {
1918 atomic_exchange_abort();
1919 lck_rw_interlock_spin(lock);
1920 continue;
1921 }
1922 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1923 atomic_exchange_abort();
1924 return FALSE; /* lock is busy */
1925 }
1926 #ifdef DEBUG_RW
1927 if ((data & LCK_RW_SHARED_MASK) == 0) {
1928 /*
1929 * If the lock is uncontended,
1930 * we do not need to check if we can lock it
1931 */
1932 check_canlock = FALSE;
1933 }
1934 #endif
1935 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1936 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1937 break;
1938 }
1939 cpu_pause();
1940 }
1941 #ifdef DEBUG_RW
1942 if (check_canlock) {
1943 /*
1944 * Best effort attempt to check that this thread
1945 * is not already holding the lock (this checks read mode too).
1946 */
1947 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1948 }
1949 #endif
1950 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1951 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1952
1953 if (lock->lck_rw_can_sleep) {
1954 lck_rw_inc_thread_count(thread);
1955 } else if (get_preemption_level() == 0) {
1956 panic("Taking non-sleepable RW lock with preemption enabled");
1957 }
1958
1959 #if CONFIG_DTRACE
1960 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1961 #endif /* CONFIG_DTRACE */
1962
1963 #ifdef DEBUG_RW
1964 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1965 #endif /* DEBUG_RW */
1966 return TRUE;
1967 }
1968
1969 __attribute__((noinline))
1970 static boolean_t
lck_rw_try_lock_shared_internal(lck_rw_t * lock,void * caller)1971 lck_rw_try_lock_shared_internal(
1972 lck_rw_t *lock,
1973 void *caller)
1974 {
1975 return lck_rw_try_lock_shared_internal_inline(lock, caller);
1976 }
1977
1978 /*!
1979 * @function lck_rw_try_lock_shared
1980 *
1981 * @abstract
1982 * Tries to locks a rw_lock in read mode.
1983 *
1984 * @discussion
1985 * This function will return and not block in case the lock is already held.
1986 * See lck_rw_lock_shared for more details.
1987 *
1988 * @param lock rw_lock to lock.
1989 *
1990 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1991 */
1992 boolean_t
lck_rw_try_lock_shared(lck_rw_t * lock)1993 lck_rw_try_lock_shared(
1994 lck_rw_t *lock)
1995 {
1996 return lck_rw_try_lock_shared_internal_inline(lock, __builtin_return_address(0));
1997 }
1998
1999 __attribute__((always_inline))
2000 static boolean_t
lck_rw_try_lock_exclusive_internal_inline(lck_rw_t * lock,void * caller)2001 lck_rw_try_lock_exclusive_internal_inline(
2002 lck_rw_t *lock,
2003 void *caller)
2004 {
2005 #pragma unused(caller)
2006 uint32_t data, prev;
2007
2008 for (;;) {
2009 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
2010 if (data & LCK_RW_INTERLOCK) {
2011 atomic_exchange_abort();
2012 lck_rw_interlock_spin(lock);
2013 continue;
2014 }
2015 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
2016 atomic_exchange_abort();
2017 return FALSE;
2018 }
2019 data |= LCK_RW_WANT_EXCL;
2020 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
2021 break;
2022 }
2023 cpu_pause();
2024 }
2025 thread_t thread = current_thread();
2026
2027 if (lock->lck_rw_can_sleep) {
2028 lck_rw_inc_thread_count(thread);
2029 } else if (get_preemption_level() == 0) {
2030 panic("Taking non-sleepable RW lock with preemption enabled");
2031 }
2032
2033 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
2034 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
2035
2036 ordered_store_rw_owner(lock, thread->ctid);
2037 #if CONFIG_DTRACE
2038 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
2039 #endif /* CONFIG_DTRACE */
2040
2041 #ifdef DEBUG_RW
2042 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
2043 #endif /* DEBUG_RW */
2044 return TRUE;
2045 }
2046
2047 __attribute__((noinline))
2048 static boolean_t
lck_rw_try_lock_exclusive_internal(lck_rw_t * lock,void * caller)2049 lck_rw_try_lock_exclusive_internal(
2050 lck_rw_t *lock,
2051 void *caller)
2052 {
2053 return lck_rw_try_lock_exclusive_internal_inline(lock, caller);
2054 }
2055
2056 /*!
2057 * @function lck_rw_try_lock_exclusive
2058 *
2059 * @abstract
2060 * Tries to locks a rw_lock in write mode.
2061 *
2062 * @discussion
2063 * This function will return and not block in case the lock is already held.
2064 * See lck_rw_lock_exclusive for more details.
2065 *
2066 * @param lock rw_lock to lock.
2067 *
2068 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2069 */
2070 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t * lock)2071 lck_rw_try_lock_exclusive(
2072 lck_rw_t *lock)
2073 {
2074 return lck_rw_try_lock_exclusive_internal_inline(lock, __builtin_return_address(0));
2075 }
2076
2077 /*
2078 * Very sad hack, but the codegen for lck_rw_try_lock
2079 * is very unhappy with the combination of __builtin_return_address()
2080 * and a noreturn function. For some reason it adds more frames
2081 * than it should. rdar://76570684
2082 */
2083 boolean_t
2084 _lck_rw_try_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
2085 #pragma clang diagnostic push
2086 #pragma clang diagnostic ignored "-Wmissing-noreturn"
2087 __attribute__((noinline, weak))
2088 boolean_t
_lck_rw_try_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2089 _lck_rw_try_lock_type_panic(
2090 lck_rw_t *lck,
2091 lck_rw_type_t lck_rw_type)
2092 {
2093 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
2094 }
2095 #pragma clang diagnostic pop
2096
2097 /*!
2098 * @function lck_rw_try_lock
2099 *
2100 * @abstract
2101 * Tries to locks a rw_lock with the specified type.
2102 *
2103 * @discussion
2104 * This function will return and not wait/block in case the lock is already held.
2105 * See lck_rw_try_lock_shared() or lck_rw_try_lock_exclusive() for more details.
2106 *
2107 * @param lck rw_lock to lock.
2108 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2109 *
2110 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2111 */
2112 boolean_t
lck_rw_try_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2113 lck_rw_try_lock(
2114 lck_rw_t *lck,
2115 lck_rw_type_t lck_rw_type)
2116 {
2117 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2118 return lck_rw_try_lock_shared_internal(lck, __builtin_return_address(0));
2119 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2120 return lck_rw_try_lock_exclusive_internal(lck, __builtin_return_address(0));
2121 }
2122 return _lck_rw_try_lock_type_panic(lck, lck_rw_type);
2123 }
2124
2125 /*
2126 * Routine: lck_rw_done_gen
2127 *
2128 * prior_lock_state is the value in the 1st
2129 * word of the lock at the time of a successful
2130 * atomic compare and exchange with the new value...
2131 * it represents the state of the lock before we
2132 * decremented the rw_shared_count or cleared either
2133 * rw_want_upgrade or rw_want_write and
2134 * the lck_x_waiting bits... since the wrapper
2135 * routine has already changed the state atomically,
2136 * we just need to decide if we should
2137 * wake up anyone and what value to return... we do
2138 * this by examining the state of the lock before
2139 * we changed it
2140 */
2141 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t * lck,uint32_t prior_lock_state)2142 lck_rw_done_gen(
2143 lck_rw_t *lck,
2144 uint32_t prior_lock_state)
2145 {
2146 lck_rw_word_t fake_lck;
2147 lck_rw_type_t lock_type;
2148 thread_t thread;
2149 uint32_t rwlock_count;
2150
2151 /*
2152 * prior_lock state is a snapshot of the 1st word of the
2153 * lock in question... we'll fake up a pointer to it
2154 * and carefully not access anything beyond whats defined
2155 * in the first word of a lck_rw_t
2156 */
2157 fake_lck.data = prior_lock_state;
2158
2159 if (fake_lck.shared_count <= 1) {
2160 if (fake_lck.w_waiting) {
2161 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2162 }
2163
2164 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
2165 thread_wakeup(LCK_RW_READER_EVENT(lck));
2166 }
2167 }
2168 if (fake_lck.shared_count) {
2169 lock_type = LCK_RW_TYPE_SHARED;
2170 } else {
2171 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2172 }
2173
2174 /* Check if dropping the lock means that we need to unpromote */
2175 thread = current_thread();
2176 if (fake_lck.can_sleep) {
2177 rwlock_count = thread->rwlock_count--;
2178 } else {
2179 rwlock_count = UINT32_MAX;
2180 }
2181
2182 if (rwlock_count == 0) {
2183 panic("rw lock count underflow for thread %p", thread);
2184 }
2185
2186 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2187 /* sched_flags checked without lock, but will be rechecked while clearing */
2188 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
2189 }
2190 #if CONFIG_DTRACE
2191 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2192 #endif
2193
2194 #ifdef DEBUG_RW
2195 remove_held_rwlock(lck, thread, lock_type);
2196 #endif /* DEBUG_RW */
2197 return lock_type;
2198 }
2199
2200 /*!
2201 * @function lck_rw_done
2202 *
2203 * @abstract
2204 * Force unlocks a rw_lock without consistency checks.
2205 *
2206 * @discussion
2207 * Do not use unless sure you can avoid consistency checks.
2208 *
2209 * @param lock rw_lock to unlock.
2210 */
2211 lck_rw_type_t
lck_rw_done(lck_rw_t * lock)2212 lck_rw_done(
2213 lck_rw_t *lock)
2214 {
2215 uint32_t data, prev;
2216 boolean_t once = FALSE;
2217
2218 #ifdef DEBUG_RW
2219 /*
2220 * Best effort attempt to check that this thread
2221 * is holding the lock.
2222 */
2223 thread_t thread = current_thread();
2224 assert_held_rwlock(lock, thread, 0);
2225 #endif /* DEBUG_RW */
2226 for (;;) {
2227 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2228 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2229 atomic_exchange_abort();
2230 lck_rw_interlock_spin(lock);
2231 continue;
2232 }
2233 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2234 assertf(lock->lck_rw_owner == 0,
2235 "state=0x%x, owner=%p", lock->lck_rw_data,
2236 ctid_get_thread_unsafe(lock->lck_rw_owner));
2237 data -= LCK_RW_SHARED_READER;
2238 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
2239 goto check_waiters;
2240 }
2241 } else { /* if reader count == 0, must be exclusive lock */
2242 if (data & LCK_RW_WANT_UPGRADE) {
2243 data &= ~(LCK_RW_WANT_UPGRADE);
2244 } else {
2245 if (data & LCK_RW_WANT_EXCL) {
2246 data &= ~(LCK_RW_WANT_EXCL);
2247 } else { /* lock is not 'owned', panic */
2248 panic("Releasing non-exclusive RW lock without a reader refcount!");
2249 }
2250 }
2251 if (!once) {
2252 // Only check for holder and clear it once
2253 assertf(lock->lck_rw_owner == current_thread()->ctid,
2254 "state=0x%x, owner=%p", lock->lck_rw_data,
2255 ctid_get_thread_unsafe(lock->lck_rw_owner));
2256 ordered_store_rw_owner(lock, 0);
2257 once = TRUE;
2258 }
2259 check_waiters:
2260 /*
2261 * test the original values to match what
2262 * lck_rw_done_gen is going to do to determine
2263 * which wakeups need to happen...
2264 *
2265 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2266 */
2267 if (prev & LCK_RW_W_WAITING) {
2268 data &= ~(LCK_RW_W_WAITING);
2269 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
2270 data &= ~(LCK_RW_R_WAITING);
2271 }
2272 } else {
2273 data &= ~(LCK_RW_R_WAITING);
2274 }
2275 }
2276 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
2277 break;
2278 }
2279 cpu_pause();
2280 }
2281 return lck_rw_done_gen(lock, prev);
2282 }
2283
2284 /*!
2285 * @function lck_rw_unlock_shared
2286 *
2287 * @abstract
2288 * Unlocks a rw_lock previously locked in shared mode.
2289 *
2290 * @discussion
2291 * The same thread that locked the lock needs to unlock it.
2292 *
2293 * @param lck rw_lock held in shared mode to unlock.
2294 */
2295 void
lck_rw_unlock_shared(lck_rw_t * lck)2296 lck_rw_unlock_shared(
2297 lck_rw_t *lck)
2298 {
2299 lck_rw_type_t ret;
2300
2301 assertf(lck->lck_rw_owner == 0,
2302 "state=0x%x, owner=%p", lck->lck_rw_data,
2303 ctid_get_thread_unsafe(lck->lck_rw_owner));
2304 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
2305 ret = lck_rw_done(lck);
2306
2307 if (ret != LCK_RW_TYPE_SHARED) {
2308 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
2309 }
2310 }
2311
2312 /*!
2313 * @function lck_rw_unlock_exclusive
2314 *
2315 * @abstract
2316 * Unlocks a rw_lock previously locked in exclusive mode.
2317 *
2318 * @discussion
2319 * The same thread that locked the lock needs to unlock it.
2320 *
2321 * @param lck rw_lock held in exclusive mode to unlock.
2322 */
2323 void
lck_rw_unlock_exclusive(lck_rw_t * lck)2324 lck_rw_unlock_exclusive(
2325 lck_rw_t *lck)
2326 {
2327 lck_rw_type_t ret;
2328
2329 assertf(lck->lck_rw_owner == current_thread()->ctid,
2330 "state=0x%x, owner=%p", lck->lck_rw_data,
2331 ctid_get_thread_unsafe(lck->lck_rw_owner));
2332 ret = lck_rw_done(lck);
2333
2334 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
2335 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
2336 }
2337 }
2338
2339 /*!
2340 * @function lck_rw_unlock
2341 *
2342 * @abstract
2343 * Unlocks a rw_lock previously locked with lck_rw_type.
2344 *
2345 * @discussion
2346 * The lock must be unlocked by the same thread it was locked from.
2347 * The type of the lock/unlock have to match, unless an upgrade/downgrade was performed while
2348 * holding the lock.
2349 *
2350 * @param lck rw_lock to unlock.
2351 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2352 */
2353 void
lck_rw_unlock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2354 lck_rw_unlock(
2355 lck_rw_t *lck,
2356 lck_rw_type_t lck_rw_type)
2357 {
2358 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2359 lck_rw_unlock_shared(lck);
2360 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2361 lck_rw_unlock_exclusive(lck);
2362 } else {
2363 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
2364 }
2365 }
2366
2367 /*!
2368 * @function lck_rw_assert
2369 *
2370 * @abstract
2371 * Asserts the rw_lock is held.
2372 *
2373 * @discussion
2374 * read-write locks do not have a concept of ownership when held in shared mode,
2375 * so this function merely asserts that someone is holding the lock, not necessarily the caller.
2376 * However if rw_lock_debug is on, a best effort mechanism to track the owners is in place, and
2377 * this function can be more accurate.
2378 * Type can be LCK_RW_ASSERT_SHARED, LCK_RW_ASSERT_EXCLUSIVE, LCK_RW_ASSERT_HELD
2379 * LCK_RW_ASSERT_NOTHELD.
2380 *
2381 * @param lck rw_lock to check.
2382 * @param type assert type
2383 */
2384 void
lck_rw_assert(lck_rw_t * lck,unsigned int type)2385 lck_rw_assert(
2386 lck_rw_t *lck,
2387 unsigned int type)
2388 {
2389 thread_t thread = current_thread();
2390
2391 switch (type) {
2392 case LCK_RW_ASSERT_SHARED:
2393 if ((lck->lck_rw_shared_count != 0) &&
2394 (lck->lck_rw_owner == 0)) {
2395 #if DEBUG_RW
2396 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2397 #endif /* DEBUG_RW */
2398 return;
2399 }
2400 break;
2401 case LCK_RW_ASSERT_EXCLUSIVE:
2402 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2403 (lck->lck_rw_shared_count == 0) &&
2404 (lck->lck_rw_owner == thread->ctid)) {
2405 #if DEBUG_RW
2406 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2407 #endif /* DEBUG_RW */
2408 return;
2409 }
2410 break;
2411 case LCK_RW_ASSERT_HELD:
2412 if (lck->lck_rw_shared_count != 0) {
2413 #if DEBUG_RW
2414 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2415 #endif /* DEBUG_RW */
2416 return; // Held shared
2417 }
2418 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2419 (lck->lck_rw_owner == thread->ctid)) {
2420 #if DEBUG_RW
2421 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2422 #endif /* DEBUG_RW */
2423 return; // Held exclusive
2424 }
2425 break;
2426 case LCK_RW_ASSERT_NOTHELD:
2427 if ((lck->lck_rw_shared_count == 0) &&
2428 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2429 (lck->lck_rw_owner == 0)) {
2430 #ifdef DEBUG_RW
2431 assert_canlock_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2432 #endif /* DEBUG_RW */
2433 return;
2434 }
2435 break;
2436 default:
2437 break;
2438 }
2439 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2440 }
2441
2442 /*!
2443 * @function kdp_lck_rw_lock_is_acquired_exclusive
2444 *
2445 * @abstract
2446 * Checks if a rw_lock is held exclusevely.
2447 *
2448 * @discussion
2449 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2450 *
2451 * @param lck lock to check
2452 *
2453 * @returns TRUE if the lock is held exclusevely
2454 */
2455 boolean_t
kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t * lck)2456 kdp_lck_rw_lock_is_acquired_exclusive(
2457 lck_rw_t *lck)
2458 {
2459 if (not_in_kdp) {
2460 panic("panic: rw lock exclusive check done outside of kernel debugger");
2461 }
2462 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2463 }
2464
2465 void
kdp_rwlck_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2466 kdp_rwlck_find_owner(
2467 __unused struct waitq *waitq,
2468 event64_t event,
2469 thread_waitinfo_t *waitinfo)
2470 {
2471 lck_rw_t *rwlck = NULL;
2472 switch (waitinfo->wait_type) {
2473 case kThreadWaitKernelRWLockRead:
2474 rwlck = READ_EVENT_TO_RWLOCK(event);
2475 break;
2476 case kThreadWaitKernelRWLockWrite:
2477 case kThreadWaitKernelRWLockUpgrade:
2478 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2479 break;
2480 default:
2481 panic("%s was called with an invalid blocking type", __FUNCTION__);
2482 break;
2483 }
2484 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2485 waitinfo->owner = thread_tid(ctid_get_thread(rwlck->lck_rw_owner));
2486 }
2487
2488 /*!
2489 * @function lck_rw_lock_yield_shared
2490 *
2491 * @abstract
2492 * Yields a rw_lock held in shared mode.
2493 *
2494 * @discussion
2495 * This function can block.
2496 * Yields the lock in case there are writers waiting.
2497 * The yield will unlock, block, and re-lock the lock in shared mode.
2498 *
2499 * @param lck rw_lock already held in shared mode to yield.
2500 * @param force_yield if set to true it will always yield irrespective of the lock status
2501 *
2502 * @returns TRUE if the lock was yield, FALSE otherwise
2503 */
2504 bool
lck_rw_lock_yield_shared(lck_rw_t * lck,boolean_t force_yield)2505 lck_rw_lock_yield_shared(
2506 lck_rw_t *lck,
2507 boolean_t force_yield)
2508 {
2509 lck_rw_word_t word;
2510
2511 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2512
2513 word.data = ordered_load_rw(lck);
2514 if (word.want_excl || word.want_upgrade || force_yield) {
2515 lck_rw_unlock_shared(lck);
2516 mutex_pause(2);
2517 lck_rw_lock_shared(lck);
2518 return true;
2519 }
2520
2521 return false;
2522 }
2523
2524 /*!
2525 * @function lck_rw_lock_yield_exclusive
2526 *
2527 * @abstract
2528 * Yields a rw_lock held in exclusive mode.
2529 *
2530 * @discussion
2531 * This function can block.
2532 * Yields the lock in case there are writers waiting.
2533 * The yield will unlock, block, and re-lock the lock in exclusive mode.
2534 *
2535 * @param lck rw_lock already held in exclusive mode to yield.
2536 * @param mode when to yield.
2537 *
2538 * @returns TRUE if the lock was yield, FALSE otherwise
2539 */
2540 bool
lck_rw_lock_yield_exclusive(lck_rw_t * lck,lck_rw_yield_t mode)2541 lck_rw_lock_yield_exclusive(
2542 lck_rw_t *lck,
2543 lck_rw_yield_t mode)
2544 {
2545 lck_rw_word_t word;
2546 bool yield = false;
2547
2548 lck_rw_assert(lck, LCK_RW_ASSERT_EXCLUSIVE);
2549
2550 if (mode == LCK_RW_YIELD_ALWAYS) {
2551 yield = true;
2552 } else {
2553 word.data = ordered_load_rw(lck);
2554 if (word.w_waiting) {
2555 yield = true;
2556 } else if (mode == LCK_RW_YIELD_ANY_WAITER) {
2557 yield = (word.r_waiting != 0);
2558 }
2559 }
2560
2561 if (yield) {
2562 lck_rw_unlock_exclusive(lck);
2563 mutex_pause(2);
2564 lck_rw_lock_exclusive(lck);
2565 }
2566
2567 return yield;
2568 }
2569
2570 /*!
2571 * @function lck_rw_sleep
2572 *
2573 * @abstract
2574 * Assert_wait on an event while holding the rw_lock.
2575 *
2576 * @discussion
2577 * the flags can decide how to re-acquire the lock upon wake up
2578 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2579 * and if the priority needs to be kept boosted until the lock is
2580 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2581 *
2582 * @param lck rw_lock to use to synch the assert_wait.
2583 * @param lck_sleep_action flags.
2584 * @param event event to assert_wait on.
2585 * @param interruptible wait type.
2586 */
2587 wait_result_t
lck_rw_sleep(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)2588 lck_rw_sleep(
2589 lck_rw_t *lck,
2590 lck_sleep_action_t lck_sleep_action,
2591 event_t event,
2592 wait_interrupt_t interruptible)
2593 {
2594 wait_result_t res;
2595 lck_rw_type_t lck_rw_type;
2596 thread_pri_floor_t token;
2597
2598 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2599 panic("Invalid lock sleep action %x", lck_sleep_action);
2600 }
2601
2602 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2603 /*
2604 * Although we are dropping the RW lock, the intent in most cases
2605 * is that this thread remains as an observer, since it may hold
2606 * some secondary resource, but must yield to avoid deadlock. In
2607 * this situation, make sure that the thread is boosted to the
2608 * ceiling while blocked, so that it can re-acquire the
2609 * RW lock at that priority.
2610 */
2611 token = thread_priority_floor_start();
2612 }
2613
2614 res = assert_wait(event, interruptible);
2615 if (res == THREAD_WAITING) {
2616 lck_rw_type = lck_rw_done(lck);
2617 res = thread_block(THREAD_CONTINUE_NULL);
2618 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2619 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2620 lck_rw_lock(lck, lck_rw_type);
2621 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2622 lck_rw_lock_exclusive(lck);
2623 } else {
2624 lck_rw_lock_shared(lck);
2625 }
2626 }
2627 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2628 (void)lck_rw_done(lck);
2629 }
2630
2631 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2632 thread_priority_floor_end(&token);
2633 }
2634
2635 return res;
2636 }
2637
2638 /*!
2639 * @function lck_rw_sleep_deadline
2640 *
2641 * @abstract
2642 * Assert_wait_deadline on an event while holding the rw_lock.
2643 *
2644 * @discussion
2645 * the flags can decide how to re-acquire the lock upon wake up
2646 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2647 * and if the priority needs to be kept boosted until the lock is
2648 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2649 *
2650 * @param lck rw_lock to use to synch the assert_wait.
2651 * @param lck_sleep_action flags.
2652 * @param event event to assert_wait on.
2653 * @param interruptible wait type.
2654 * @param deadline maximum time after which being woken up
2655 */
2656 wait_result_t
lck_rw_sleep_deadline(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)2657 lck_rw_sleep_deadline(
2658 lck_rw_t *lck,
2659 lck_sleep_action_t lck_sleep_action,
2660 event_t event,
2661 wait_interrupt_t interruptible,
2662 uint64_t deadline)
2663 {
2664 wait_result_t res;
2665 lck_rw_type_t lck_rw_type;
2666 thread_pri_floor_t token;
2667
2668 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2669 panic("Invalid lock sleep action %x", lck_sleep_action);
2670 }
2671
2672 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2673 token = thread_priority_floor_start();
2674 }
2675
2676 res = assert_wait_deadline(event, interruptible, deadline);
2677 if (res == THREAD_WAITING) {
2678 lck_rw_type = lck_rw_done(lck);
2679 res = thread_block(THREAD_CONTINUE_NULL);
2680 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2681 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2682 lck_rw_lock(lck, lck_rw_type);
2683 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2684 lck_rw_lock_exclusive(lck);
2685 } else {
2686 lck_rw_lock_shared(lck);
2687 }
2688 }
2689 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2690 (void)lck_rw_done(lck);
2691 }
2692
2693 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2694 thread_priority_floor_end(&token);
2695 }
2696
2697 return res;
2698 }
2699
2700 /*
2701 * Reader-writer lock promotion
2702 *
2703 * We support a limited form of reader-writer
2704 * lock promotion whose effects are:
2705 *
2706 * * Qualifying threads have decay disabled
2707 * * Scheduler priority is reset to a floor of
2708 * of their statically assigned priority
2709 * or MINPRI_RWLOCK
2710 *
2711 * The rationale is that lck_rw_ts do not have
2712 * a single owner, so we cannot apply a directed
2713 * priority boost from all waiting threads
2714 * to all holding threads without maintaining
2715 * lists of all shared owners and all waiting
2716 * threads for every lock.
2717 *
2718 * Instead (and to preserve the uncontended fast-
2719 * path), acquiring (or attempting to acquire)
2720 * a RW lock in shared or exclusive lock increments
2721 * a per-thread counter. Only if that thread stops
2722 * making forward progress (for instance blocking
2723 * on a mutex, or being preempted) do we consult
2724 * the counter and apply the priority floor.
2725 * When the thread becomes runnable again (or in
2726 * the case of preemption it never stopped being
2727 * runnable), it has the priority boost and should
2728 * be in a good position to run on the CPU and
2729 * release all RW locks (at which point the priority
2730 * boost is cleared).
2731 *
2732 * Care must be taken to ensure that priority
2733 * boosts are not retained indefinitely, since unlike
2734 * mutex priority boosts (where the boost is tied
2735 * to the mutex lifecycle), the boost is tied
2736 * to the thread and independent of any particular
2737 * lck_rw_t. Assertions are in place on return
2738 * to userspace so that the boost is not held
2739 * indefinitely.
2740 *
2741 * The routines that increment/decrement the
2742 * per-thread counter should err on the side of
2743 * incrementing any time a preemption is possible
2744 * and the lock would be visible to the rest of the
2745 * system as held (so it should be incremented before
2746 * interlocks are dropped/preemption is enabled, or
2747 * before a CAS is executed to acquire the lock).
2748 *
2749 */
2750
2751 /*!
2752 * @function lck_rw_clear_promotion
2753 *
2754 * @abstract
2755 * Undo priority promotions when the last rw_lock
2756 * is released by a thread (if a promotion was active).
2757 *
2758 * @param thread thread to demote.
2759 * @param trace_obj object reason for the demotion.
2760 */
2761 void
lck_rw_clear_promotion(thread_t thread,uintptr_t trace_obj)2762 lck_rw_clear_promotion(
2763 thread_t thread,
2764 uintptr_t trace_obj)
2765 {
2766 assert(thread->rwlock_count == 0);
2767
2768 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
2769 spl_t s = splsched();
2770 thread_lock(thread);
2771
2772 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
2773 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj);
2774 }
2775
2776 thread_unlock(thread);
2777 splx(s);
2778 }
2779
2780 /*!
2781 * @function lck_rw_set_promotion_locked
2782 *
2783 * @abstract
2784 * Callout from context switch if the thread goes
2785 * off core with a positive rwlock_count.
2786 *
2787 * @discussion
2788 * Called at splsched with the thread locked.
2789 *
2790 * @param thread thread to promote.
2791 */
2792 void
lck_rw_set_promotion_locked(thread_t thread)2793 lck_rw_set_promotion_locked(thread_t thread)
2794 {
2795 if (LcksOpts & disLkRWPrio) {
2796 return;
2797 }
2798
2799 assert(thread->rwlock_count > 0);
2800
2801 if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2802 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
2803 }
2804 }
2805