1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 #define LOCK_PRIVATE 1
57 #include <debug.h>
58 #include <kern/locks_internal.h>
59 #include <kern/lock_stat.h>
60 #include <kern/locks.h>
61 #include <kern/zalloc.h>
62 #include <kern/thread.h>
63 #include <kern/processor.h>
64 #include <kern/sched_prim.h>
65 #include <kern/debug.h>
66 #include <machine/atomic.h>
67 #include <machine/machine_cpu.h>
68
69 KALLOC_TYPE_DEFINE(KT_LCK_RW, lck_rw_t, KT_PRIV_ACCT);
70
71 #define LCK_RW_WRITER_EVENT(lck) (event_t)((uintptr_t)(lck)+1)
72 #define LCK_RW_READER_EVENT(lck) (event_t)((uintptr_t)(lck)+2)
73 #define WRITE_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-1))
74 #define READ_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-2))
75
76 #if CONFIG_DTRACE
77 #define DTRACE_RW_SHARED 0x0 //reader
78 #define DTRACE_RW_EXCL 0x1 //writer
79 #define DTRACE_NO_FLAG 0x0 //not applicable
80 #endif /* CONFIG_DTRACE */
81
82 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
83 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
84 #define LCK_RW_LCK_SHARED_CODE 0x102
85 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
86 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
87 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
88
89 #if __x86_64__
90 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
91 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
92 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
93 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
94 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
95 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
96 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
97 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
98 #endif
99
100 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
101 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
102
103 #define ordered_load_rw(lock) os_atomic_load(&(lock)->lck_rw_data, compiler_acq_rel)
104 #define ordered_store_rw(lock, value) os_atomic_store(&(lock)->lck_rw_data, (value), compiler_acq_rel)
105 #define ordered_store_rw_owner(lock, value) os_atomic_store(&(lock)->lck_rw_owner, (value), compiler_acq_rel)
106
107 #ifdef DEBUG_RW
108
109 STATIC_IF_KEY_DEFINE_TRUE(lck_rw_assert);
110
111 static TUNABLE(bool, lck_rw_recursive_shared_assert_74048094, "lck_rw_recursive_shared_assert", false);
112 SECURITY_READ_ONLY_EARLY(vm_packing_params_t) rwlde_caller_packing_params =
113 VM_PACKING_PARAMS(LCK_RW_CALLER_PACKED);
114
115 #define set_rwlde_caller_packed(entry, caller) ((entry)->rwlde_caller_packed = VM_PACK_POINTER((vm_offset_t)caller, LCK_RW_CALLER_PACKED))
116 #define get_rwlde_caller(entry) ((void*)VM_UNPACK_POINTER(entry->rwlde_caller_packed, LCK_RW_CALLER_PACKED))
117
118 #endif /* DEBUG_RW */
119
120 /*!
121 * @function lck_rw_alloc_init
122 *
123 * @abstract
124 * Allocates and initializes a rw_lock_t.
125 *
126 * @discussion
127 * The function can block. See lck_rw_init() for initialization details.
128 *
129 * @param grp lock group to associate with the lock.
130 * @param attr lock attribute to initialize the lock.
131 *
132 * @returns NULL or the allocated lock
133 */
134 lck_rw_t *
lck_rw_alloc_init(lck_grp_t * grp,lck_attr_t * attr)135 lck_rw_alloc_init(
136 lck_grp_t *grp,
137 lck_attr_t *attr)
138 {
139 lck_rw_t *lck;
140
141 lck = zalloc_flags(KT_LCK_RW, Z_WAITOK | Z_ZERO);
142 lck_rw_init(lck, grp, attr);
143 return lck;
144 }
145
146 /*!
147 * @function lck_rw_init
148 *
149 * @abstract
150 * Initializes a rw_lock_t.
151 *
152 * @discussion
153 * Usage statistics for the lock are going to be added to the lock group provided.
154 *
155 * The lock attribute can be used to specify the lock contention behaviour.
156 * RW_WRITER_PRIORITY is the default behaviour (LCK_ATTR_NULL defaults to RW_WRITER_PRIORITY)
157 * and lck_attr_rw_shared_priority() can be used to set the behaviour to RW_SHARED_PRIORITY.
158 *
159 * RW_WRITER_PRIORITY gives priority to the writers upon contention with the readers;
160 * if the lock is held and a writer starts waiting for the lock, readers will not be able
161 * to acquire the lock until all writers stop contending. Readers could
162 * potentially starve.
163 * RW_SHARED_PRIORITY gives priority to the readers upon contention with the writers:
164 * unleass the lock is held in exclusive mode, readers will always be able to acquire the lock.
165 * Readers can lock a shared lock even if there are writers waiting. Writers could potentially
166 * starve.
167 *
168 * @param lck lock to initialize.
169 * @param grp lock group to associate with the lock.
170 * @param attr lock attribute to initialize the lock.
171 *
172 */
173 void
lck_rw_init(lck_rw_t * lck,lck_grp_t * grp,lck_attr_t * attr)174 lck_rw_init(
175 lck_rw_t *lck,
176 lck_grp_t *grp,
177 lck_attr_t *attr)
178 {
179 /* keep this so that the lck_type_t type is referenced for lldb */
180 lck_type_t type = LCK_TYPE_RW;
181
182 if (attr == LCK_ATTR_NULL) {
183 attr = &lck_attr_default;
184 }
185 *lck = (lck_rw_t){
186 .lck_rw_type = type,
187 .lck_rw_can_sleep = true,
188 .lck_rw_priv_excl = !(attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY),
189 };
190 lck_grp_reference(grp, &grp->lck_grp_rwcnt);
191 }
192
193 /*!
194 * @function lck_rw_free
195 *
196 * @abstract
197 * Frees a rw_lock previously allocated with lck_rw_alloc_init().
198 *
199 * @discussion
200 * The lock must be not held by any thread.
201 *
202 * @param lck rw_lock to free.
203 */
204 void
lck_rw_free(lck_rw_t * lck,lck_grp_t * grp)205 lck_rw_free(
206 lck_rw_t *lck,
207 lck_grp_t *grp)
208 {
209 lck_rw_destroy(lck, grp);
210 zfree(KT_LCK_RW, lck);
211 }
212
213 /*!
214 * @function lck_rw_destroy
215 *
216 * @abstract
217 * Destroys a rw_lock previously initialized with lck_rw_init().
218 *
219 * @discussion
220 * The lock must be not held by any thread.
221 *
222 * @param lck rw_lock to destroy.
223 */
224 void
lck_rw_destroy(lck_rw_t * lck,lck_grp_t * grp)225 lck_rw_destroy(
226 lck_rw_t *lck,
227 lck_grp_t *grp)
228 {
229 if (lck->lck_rw_type != LCK_TYPE_RW ||
230 lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
231 panic("Destroying previously destroyed lock %p", lck);
232 }
233 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
234
235 lck->lck_rw_type = LCK_TYPE_NONE;
236 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
237 lck_grp_deallocate(grp, &grp->lck_grp_rwcnt);
238 }
239
240 #ifdef DEBUG_RW
241
242 /*
243 * Best effort mechanism to debug rw_locks.
244 *
245 * This mechanism is in addition to the owner checks. The owner is set
246 * only when the lock is held in exclusive mode so the checks do not cover
247 * the cases in which the lock is held in shared mode.
248 *
249 * This mechanism tentatively stores the rw_lock acquired and its debug
250 * information on the thread struct.
251 * Just up to LCK_RW_EXPECTED_MAX_NUMBER rw lock debug information can be stored.
252 *
253 * NOTE: LCK_RW_EXPECTED_MAX_NUMBER is the expected number of rw_locks held
254 * at the same time. If a thread holds more than this number of rw_locks we
255 * will start losing debug information.
256 * Increasing LCK_RW_EXPECTED_MAX_NUMBER will increase the probability we will
257 * store the debug information but it will require more memory per thread
258 * and longer lock/unlock time.
259 *
260 * If an empty slot is found for the debug information, we record the lock
261 * otherwise we set the overflow threshold flag.
262 *
263 * If we reached the overflow threshold we might stop asserting because we cannot be sure
264 * anymore if the lock was acquired or not.
265 *
266 * Even if we reached the overflow threshold, we try to store the debug information
267 * for the new locks acquired. This can be useful in core dumps to debug
268 * possible return to userspace without unlocking and to find possible readers
269 * holding the lock.
270 */
271 #if DEBUG_RW
272
273 __static_if_init_func
274 void
lck_rw_assert_init(const char * args,uint64_t kf_ovrd)275 lck_rw_assert_init(const char *args, uint64_t kf_ovrd)
276 {
277 bool lck_rw_assert_disable = false;
278
279 if (kf_ovrd & KF_MACH_ASSERT_OVRD) {
280 lck_rw_assert_disable = true;
281 }
282
283 if (static_if_boot_arg_uint64(args, "lcks", 0) &
284 LCK_OPTION_DISABLE_RW_DEBUG) {
285 lck_rw_assert_disable = true;
286 }
287
288 if (lck_rw_assert_disable) {
289 static_if_key_disable(lck_rw_assert);
290 }
291 }
292
293 #endif /* DEBUG_RW */
294
295 static inline struct rw_lock_debug_entry *
find_lock_in_savedlocks(lck_rw_t * lock,rw_lock_debug_t * rw_locks_held)296 find_lock_in_savedlocks(lck_rw_t* lock, rw_lock_debug_t *rw_locks_held)
297 {
298 int i;
299 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
300 struct rw_lock_debug_entry *existing = &rw_locks_held->rwld_locks[i];
301 if (existing->rwlde_lock == lock) {
302 return existing;
303 }
304 }
305
306 return NULL;
307 }
308
309 __abortlike
310 static void
rwlock_slot_panic(rw_lock_debug_t * rw_locks_held)311 rwlock_slot_panic(rw_lock_debug_t *rw_locks_held)
312 {
313 panic("No empty slot found in %p slot_used %d", rw_locks_held, rw_locks_held->rwld_locks_saved);
314 }
315
316 static inline struct rw_lock_debug_entry *
find_empty_slot(rw_lock_debug_t * rw_locks_held)317 find_empty_slot(rw_lock_debug_t *rw_locks_held)
318 {
319 int i;
320 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
321 struct rw_lock_debug_entry *entry = &rw_locks_held->rwld_locks[i];
322 if (entry->rwlde_lock == NULL) {
323 return entry;
324 }
325 }
326 rwlock_slot_panic(rw_locks_held);
327 }
328
329 __abortlike
330 static void
canlock_rwlock_panic(lck_rw_t * lock,thread_t thread,struct rw_lock_debug_entry * entry)331 canlock_rwlock_panic(lck_rw_t* lock, thread_t thread, struct rw_lock_debug_entry *entry)
332 {
333 panic("RW lock %p already held by %p caller %p mode_count %d state 0x%x owner 0x%p ",
334 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
335 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
336 }
337
338 __attribute__((noinline))
339 static void
assert_canlock_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)340 assert_canlock_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
341 {
342 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
343 if (__probable(rw_locks_held->rwld_locks_acquired == 0)) {
344 //no locks saved, safe to lock
345 return;
346 }
347
348 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
349 if (__improbable(entry != NULL)) {
350 boolean_t can_be_shared_recursive;
351 if (lck_rw_recursive_shared_assert_74048094) {
352 can_be_shared_recursive = (lock->lck_rw_priv_excl == 0);
353 } else {
354 /* currently rw_lock_shared is called recursively,
355 * until the code is fixed allow to lock
356 * recursively in shared mode
357 */
358 can_be_shared_recursive = TRUE;
359 }
360 if ((type == LCK_RW_TYPE_SHARED) && can_be_shared_recursive && entry->rwlde_mode_count >= 1) {
361 return;
362 }
363 canlock_rwlock_panic(lock, thread, entry);
364 }
365 }
366
367 static inline void
assert_canlock_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)368 assert_canlock_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
369 {
370 if (lck_rw_assert_enabled()) {
371 assert_canlock_rwlock_slow(lock, thread, type);
372 }
373 }
374
375 __abortlike
376 static void
held_rwlock_notheld_panic(lck_rw_t * lock,thread_t thread)377 held_rwlock_notheld_panic(lck_rw_t* lock, thread_t thread)
378 {
379 panic("RW lock %p not held by %p", lock, thread);
380 }
381
382 __abortlike
383 static void
held_rwlock_notheld_with_info_panic(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,struct rw_lock_debug_entry * entry)384 held_rwlock_notheld_with_info_panic(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, struct rw_lock_debug_entry *entry)
385 {
386 if (type == LCK_RW_TYPE_EXCLUSIVE) {
387 panic("RW lock %p not held in exclusive by %p caller %p read %d state 0x%x owner 0x%p ",
388 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
389 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
390 } else {
391 panic("RW lock %p not held in shared by %p caller %p read %d state 0x%x owner 0x%p ",
392 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
393 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
394 }
395 }
396
397 __attribute__((noinline))
398 static void
assert_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)399 assert_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
400 {
401 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
402
403 if (__improbable(rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_locks_saved == 0)) {
404 if (rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_overflow == 0) {
405 held_rwlock_notheld_panic(lock, thread);
406 }
407 return;
408 }
409
410 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
411 if (__probable(entry != NULL)) {
412 if (type == LCK_RW_TYPE_EXCLUSIVE && entry->rwlde_mode_count != -1) {
413 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
414 } else {
415 if (type == LCK_RW_TYPE_SHARED && entry->rwlde_mode_count <= 0) {
416 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
417 }
418 }
419 } else {
420 if (rw_locks_held->rwld_overflow == 0) {
421 held_rwlock_notheld_panic(lock, thread);
422 }
423 }
424 }
425
426 static inline void
assert_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)427 assert_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
428 {
429 if (lck_rw_assert_enabled()) {
430 assert_held_rwlock_slow(lock, thread, type);
431 }
432 }
433
434 __attribute__((noinline))
435 static void
change_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)436 change_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
437 {
438 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
439 if (__improbable(rw_locks_held->rwld_locks_saved == 0)) {
440 if (rw_locks_held->rwld_overflow == 0) {
441 held_rwlock_notheld_panic(lock, thread);
442 }
443 return;
444 }
445
446 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
447 if (__probable(entry != NULL)) {
448 if (typeFrom == LCK_RW_TYPE_SHARED) {
449 //We are upgrading
450 assertf(entry->rwlde_mode_count == 1,
451 "RW lock %p not held by a single shared when upgrading "
452 "by %p caller %p read %d state 0x%x owner 0x%p ",
453 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
454 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
455 entry->rwlde_mode_count = -1;
456 set_rwlde_caller_packed(entry, caller);
457 } else {
458 //We are downgrading
459 assertf(entry->rwlde_mode_count == -1,
460 "RW lock %p not held in write mode when downgrading "
461 "by %p caller %p read %d state 0x%x owner 0x%p ",
462 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
463 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
464 entry->rwlde_mode_count = 1;
465 set_rwlde_caller_packed(entry, caller);
466 }
467 return;
468 }
469
470 if (rw_locks_held->rwld_overflow == 0) {
471 held_rwlock_notheld_panic(lock, thread);
472 }
473
474 if (rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER) {
475 //array is full
476 return;
477 }
478
479 struct rw_lock_debug_entry *null_entry = find_empty_slot(rw_locks_held);
480 null_entry->rwlde_lock = lock;
481 set_rwlde_caller_packed(null_entry, caller);
482 if (typeFrom == LCK_RW_TYPE_SHARED) {
483 null_entry->rwlde_mode_count = -1;
484 } else {
485 null_entry->rwlde_mode_count = 1;
486 }
487 rw_locks_held->rwld_locks_saved++;
488 }
489
490 static inline void
change_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)491 change_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
492 {
493 if (lck_rw_assert_enabled()) {
494 change_held_rwlock_slow(lock, thread, typeFrom, caller);
495 }
496 }
497
498 __abortlike
499 static void
add_held_rwlock_too_many_panic(thread_t thread)500 add_held_rwlock_too_many_panic(thread_t thread)
501 {
502 panic("RW lock too many rw locks held, rwld_locks_acquired maxed out for thread %p", thread);
503 }
504
505 static __attribute__((noinline)) void
add_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)506 add_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
507 {
508 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
509 struct rw_lock_debug_entry *null_entry;
510 if (__improbable(rw_locks_held->rwld_locks_acquired == UINT32_MAX)) {
511 add_held_rwlock_too_many_panic(thread);
512 }
513 rw_locks_held->rwld_locks_acquired++;
514
515 if (type == LCK_RW_TYPE_EXCLUSIVE) {
516 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
517 //array is full
518 rw_locks_held->rwld_overflow = 1;
519 return;
520 }
521 null_entry = find_empty_slot(rw_locks_held);
522 null_entry->rwlde_lock = lock;
523 set_rwlde_caller_packed(null_entry, caller);
524 null_entry->rwlde_mode_count = -1;
525 rw_locks_held->rwld_locks_saved++;
526 return;
527 } else {
528 if (__probable(rw_locks_held->rwld_locks_saved == 0)) {
529 //array is empty
530 goto add_shared;
531 }
532
533 boolean_t allow_shared_recursive;
534 if (lck_rw_recursive_shared_assert_74048094) {
535 allow_shared_recursive = (lock->lck_rw_priv_excl == 0);
536 } else {
537 allow_shared_recursive = TRUE;
538 }
539 if (allow_shared_recursive) {
540 //It could be already locked in shared mode
541 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
542 if (entry != NULL) {
543 assert(entry->rwlde_mode_count > 0);
544 assertf(entry->rwlde_mode_count != INT8_MAX,
545 "RW lock %p with too many recursive shared held "
546 "from %p caller %p read %d state 0x%x owner 0x%p",
547 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
548 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
549 entry->rwlde_mode_count += 1;
550 return;
551 }
552 }
553
554 //none of the locks were a match
555 //try to add a new entry
556 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
557 //array is full
558 rw_locks_held->rwld_overflow = 1;
559 return;
560 }
561
562 add_shared:
563 null_entry = find_empty_slot(rw_locks_held);
564 null_entry->rwlde_lock = lock;
565 set_rwlde_caller_packed(null_entry, caller);
566 null_entry->rwlde_mode_count = 1;
567 rw_locks_held->rwld_locks_saved++;
568 }
569 }
570
571 static inline void
add_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)572 add_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
573 {
574 if (lck_rw_assert_enabled()) {
575 add_held_rwlock_slow(lock, thread, type, caller);
576 }
577 }
578
579 static void
remove_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)580 remove_held_rwlock_slow(lck_rw_t *lock, thread_t thread, lck_rw_type_t type)
581 {
582 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
583 if (__improbable(rw_locks_held->rwld_locks_acquired == 0)) {
584 return;
585 }
586 rw_locks_held->rwld_locks_acquired--;
587
588 if (rw_locks_held->rwld_locks_saved == 0) {
589 assert(rw_locks_held->rwld_overflow == 1);
590 goto out;
591 }
592
593 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
594 if (__probable(entry != NULL)) {
595 if (type == LCK_RW_TYPE_EXCLUSIVE) {
596 assert(entry->rwlde_mode_count == -1);
597 entry->rwlde_mode_count = 0;
598 } else {
599 assert(entry->rwlde_mode_count > 0);
600 entry->rwlde_mode_count--;
601 if (entry->rwlde_mode_count > 0) {
602 goto out;
603 }
604 }
605 entry->rwlde_caller_packed = 0;
606 entry->rwlde_lock = NULL;
607 rw_locks_held->rwld_locks_saved--;
608 } else {
609 assert(rw_locks_held->rwld_overflow == 1);
610 }
611
612 out:
613 if (rw_locks_held->rwld_locks_acquired == 0) {
614 rw_locks_held->rwld_overflow = 0;
615 }
616 return;
617 }
618
619 static inline void
remove_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)620 remove_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
621 {
622 if (lck_rw_assert_enabled()) {
623 remove_held_rwlock_slow(lock, thread, type);
624 }
625 }
626 #endif /* DEBUG_RW */
627
628 /*
629 * We disable interrupts while holding the RW interlock to prevent an
630 * interrupt from exacerbating hold time.
631 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
632 */
633 static inline boolean_t
lck_interlock_lock(lck_rw_t * lck)634 lck_interlock_lock(
635 lck_rw_t *lck)
636 {
637 boolean_t istate;
638
639 istate = ml_set_interrupts_enabled(FALSE);
640 lck_rw_ilk_lock(lck);
641 return istate;
642 }
643
644 static inline void
lck_interlock_unlock(lck_rw_t * lck,boolean_t istate)645 lck_interlock_unlock(
646 lck_rw_t *lck,
647 boolean_t istate)
648 {
649 lck_rw_ilk_unlock(lck);
650 ml_set_interrupts_enabled(istate);
651 }
652
653 /*
654 * compute the deadline to spin against when
655 * waiting for a change of state on a lck_rw_t
656 */
657 static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t * lck)658 lck_rw_deadline_for_spin(
659 lck_rw_t *lck)
660 {
661 lck_rw_word_t word;
662
663 word.data = ordered_load_rw(lck);
664 if (word.can_sleep) {
665 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
666 /*
667 * there are already threads waiting on this lock... this
668 * implies that they have spun beyond their deadlines waiting for
669 * the desired state to show up so we will not bother spinning at this time...
670 * or
671 * the current number of threads sharing this lock exceeds our capacity to run them
672 * concurrently and since all states we're going to spin for require the rw_shared_count
673 * to be at 0, we'll not bother spinning since the latency for this to happen is
674 * unpredictable...
675 */
676 return mach_absolute_time();
677 }
678 return mach_absolute_time() + os_atomic_load(&MutexSpin, relaxed);
679 } else {
680 return mach_absolute_time() + (100000LL * 1000000000LL);
681 }
682 }
683
684 /*
685 * This inline is used when busy-waiting for an rw lock.
686 * If interrupts were disabled when the lock primitive was called,
687 * we poll the IPI handler for pending tlb flushes in x86.
688 */
689 static inline void
lck_rw_lock_pause(boolean_t interrupts_enabled)690 lck_rw_lock_pause(
691 boolean_t interrupts_enabled)
692 {
693 #if X86_64
694 if (!interrupts_enabled) {
695 handle_pending_TLB_flushes();
696 }
697 cpu_pause();
698 #else
699 (void) interrupts_enabled;
700 wait_for_event();
701 #endif
702 }
703
704 typedef enum __enum_closed {
705 LCK_RW_DRAIN_S_DRAINED = 0,
706 LCK_RW_DRAIN_S_NOT_DRAINED = 1,
707 LCK_RW_DRAIN_S_EARLY_RETURN = 2,
708 LCK_RW_DRAIN_S_TIMED_OUT = 3,
709 } lck_rw_drain_state_t;
710
711 static lck_rw_drain_state_t
712 lck_rw_drain_status(
713 lck_rw_t *lock,
714 uint32_t status_mask,
715 boolean_t wait,
716 bool (^lock_pause)(void))
717 {
718 uint64_t deadline = 0;
719 uint32_t data;
720 boolean_t istate = FALSE;
721
722 if (wait) {
723 deadline = lck_rw_deadline_for_spin(lock);
724 #if __x86_64__
725 istate = ml_get_interrupts_enabled();
726 #endif
727 }
728
729 for (;;) {
730 #if __x86_64__
731 data = os_atomic_load(&lock->lck_rw_data, relaxed);
732 #else
733 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
734 #endif
735 if ((data & status_mask) == 0) {
736 atomic_exchange_abort();
737 return LCK_RW_DRAIN_S_DRAINED;
738 }
739
740 if (!wait) {
741 atomic_exchange_abort();
742 return LCK_RW_DRAIN_S_NOT_DRAINED;
743 }
744
745 lck_rw_lock_pause(istate);
746
747 if (mach_absolute_time() >= deadline) {
748 return LCK_RW_DRAIN_S_TIMED_OUT;
749 }
750
751 if (lock_pause && lock_pause()) {
752 return LCK_RW_DRAIN_S_EARLY_RETURN;
753 }
754 }
755 }
756
757 /*
758 * Spin while interlock is held.
759 */
760 static inline void
lck_rw_interlock_spin(lck_rw_t * lock)761 lck_rw_interlock_spin(
762 lck_rw_t *lock)
763 {
764 uint32_t data, prev;
765
766 for (;;) {
767 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_relaxed);
768 if (data & LCK_RW_INTERLOCK) {
769 #if __x86_64__
770 cpu_pause();
771 #else
772 wait_for_event();
773 #endif
774 } else {
775 atomic_exchange_abort();
776 return;
777 }
778 }
779 }
780
781 #define LCK_RW_GRAB_WANT 0
782 #define LCK_RW_GRAB_SHARED 1
783
784 typedef enum __enum_closed __enum_options {
785 LCK_RW_GRAB_F_SHARED = 0x0, // Not really a flag obviously but makes call sites more readable.
786 LCK_RW_GRAB_F_WANT_EXCL = 0x1,
787 LCK_RW_GRAB_F_WAIT = 0x2,
788 } lck_rw_grab_flags_t;
789
790 typedef enum __enum_closed {
791 LCK_RW_GRAB_S_NOT_LOCKED = 0,
792 LCK_RW_GRAB_S_LOCKED = 1,
793 LCK_RW_GRAB_S_EARLY_RETURN = 2,
794 LCK_RW_GRAB_S_TIMED_OUT = 3,
795 } lck_rw_grab_state_t;
796
797 static lck_rw_grab_state_t
798 lck_rw_grab(
799 lck_rw_t *lock,
800 lck_rw_grab_flags_t flags,
801 bool (^lock_pause)(void))
802 {
803 uint64_t deadline = 0;
804 uint32_t data, prev;
805 boolean_t do_exch, istate = FALSE;
806
807 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
808
809 if ((flags & LCK_RW_GRAB_F_WAIT) != 0) {
810 deadline = lck_rw_deadline_for_spin(lock);
811 #if __x86_64__
812 istate = ml_get_interrupts_enabled();
813 #endif
814 }
815
816 for (;;) {
817 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
818 if (data & LCK_RW_INTERLOCK) {
819 atomic_exchange_abort();
820 lck_rw_interlock_spin(lock);
821 continue;
822 }
823 do_exch = FALSE;
824 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
825 if ((data & LCK_RW_WANT_EXCL) == 0) {
826 data |= LCK_RW_WANT_EXCL;
827 do_exch = TRUE;
828 }
829 } else { // LCK_RW_GRAB_SHARED
830 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
831 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
832 data += LCK_RW_SHARED_READER;
833 do_exch = TRUE;
834 }
835 }
836 if (do_exch) {
837 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
838 return LCK_RW_GRAB_S_LOCKED;
839 }
840 } else {
841 if ((flags & LCK_RW_GRAB_F_WAIT) == 0) {
842 atomic_exchange_abort();
843 return LCK_RW_GRAB_S_NOT_LOCKED;
844 }
845
846 lck_rw_lock_pause(istate);
847
848 if (mach_absolute_time() >= deadline) {
849 return LCK_RW_GRAB_S_TIMED_OUT;
850 }
851 if (lock_pause && lock_pause()) {
852 return LCK_RW_GRAB_S_EARLY_RETURN;
853 }
854 }
855 }
856 }
857
858 /*
859 * The inverse of lck_rw_grab - drops either the LCK_RW_WANT_EXCL bit or
860 * decrements the reader count. Doesn't deal with waking up waiters - i.e.
861 * should only be called when can_sleep is false.
862 */
863 static void
lck_rw_drop(lck_rw_t * lock,lck_rw_grab_flags_t flags)864 lck_rw_drop(lck_rw_t *lock, lck_rw_grab_flags_t flags)
865 {
866 uint32_t data, prev;
867
868 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
869 assert(!lock->lck_rw_can_sleep);
870
871 for (;;) {
872 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
873
874 /* Interlock should never be taken when can_sleep is false. */
875 assert3u(data & LCK_RW_INTERLOCK, ==, 0);
876
877 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
878 data &= ~LCK_RW_WANT_EXCL;
879 } else {
880 data -= LCK_RW_SHARED_READER;
881 }
882
883 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
884 break;
885 }
886
887 cpu_pause();
888 }
889
890 return;
891 }
892
893 static boolean_t
894 lck_rw_lock_exclusive_gen(
895 lck_rw_t *lock,
896 bool (^lock_pause)(void))
897 {
898 __assert_only thread_t self = current_thread();
899 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
900 lck_rw_word_t word;
901 int slept = 0;
902 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
903 lck_rw_drain_state_t drain_state = LCK_RW_DRAIN_S_NOT_DRAINED;
904 wait_result_t res = 0;
905 boolean_t istate;
906
907 #if CONFIG_DTRACE
908 boolean_t dtrace_ls_initialized = FALSE;
909 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
910 uint64_t wait_interval = 0;
911 int readers_at_sleep = 0;
912 #endif
913
914 assertf(lock->lck_rw_owner != self->ctid,
915 "Lock already held state=0x%x, owner=%p",
916 ordered_load_rw(lock), self);
917
918 #ifdef DEBUG_RW
919 /*
920 * Best effort attempt to check that this thread
921 * is not already holding the lock (this checks read mode too).
922 */
923 assert_canlock_rwlock(lock, self, LCK_RW_TYPE_EXCLUSIVE);
924 #endif /* DEBUG_RW */
925
926 /*
927 * Try to acquire the lck_rw_want_excl bit.
928 */
929 while (lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL, NULL) != LCK_RW_GRAB_S_LOCKED) {
930 #if CONFIG_DTRACE
931 if (dtrace_ls_initialized == FALSE) {
932 dtrace_ls_initialized = TRUE;
933 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
934 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
935 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
936 if (dtrace_ls_enabled) {
937 /*
938 * Either sleeping or spinning is happening,
939 * start a timing of our delay interval now.
940 */
941 readers_at_sleep = lock->lck_rw_shared_count;
942 wait_interval = mach_absolute_time();
943 }
944 }
945 #endif
946
947 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START,
948 trace_lck, 0, 0, 0, 0);
949
950 grab_state = lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT, lock_pause);
951
952 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END,
953 trace_lck, 0, 0, grab_state, 0);
954
955 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
956 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
957 break;
958 }
959 /*
960 * if we get here, the deadline has expired w/o us
961 * being able to grab the lock exclusively
962 * check to see if we're allowed to do a thread_block
963 */
964 word.data = ordered_load_rw(lock);
965 if (word.can_sleep) {
966 istate = lck_interlock_lock(lock);
967 word.data = ordered_load_rw(lock);
968
969 if (word.want_excl) {
970 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
971
972 word.w_waiting = 1;
973 ordered_store_rw(lock, word.data);
974
975 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
976 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
977 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
978 lck_interlock_unlock(lock, istate);
979 if (res == THREAD_WAITING) {
980 res = thread_block(THREAD_CONTINUE_NULL);
981 slept++;
982 }
983 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
984 } else {
985 word.want_excl = 1;
986 ordered_store_rw(lock, word.data);
987 lck_interlock_unlock(lock, istate);
988 break;
989 }
990 }
991 }
992
993 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
994 assert(lock_pause);
995 return FALSE;
996 }
997
998 /*
999 * Wait for readers (and upgrades) to finish...
1000 */
1001 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
1002 #if CONFIG_DTRACE
1003 /*
1004 * Either sleeping or spinning is happening, start
1005 * a timing of our delay interval now. If we set it
1006 * to -1 we don't have accurate data so we cannot later
1007 * decide to record a dtrace spin or sleep event.
1008 */
1009 if (dtrace_ls_initialized == FALSE) {
1010 dtrace_ls_initialized = TRUE;
1011 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1012 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1013 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1014 if (dtrace_ls_enabled) {
1015 /*
1016 * Either sleeping or spinning is happening,
1017 * start a timing of our delay interval now.
1018 */
1019 readers_at_sleep = lock->lck_rw_shared_count;
1020 wait_interval = mach_absolute_time();
1021 }
1022 }
1023 #endif
1024
1025 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1026
1027 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE, lock_pause);
1028
1029 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, drain_state, 0);
1030
1031 if (drain_state == LCK_RW_DRAIN_S_DRAINED ||
1032 drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1033 break;
1034 }
1035 /*
1036 * if we get here, the deadline has expired w/o us
1037 * being able to grab the lock exclusively
1038 * check to see if we're allowed to do a thread_block
1039 */
1040 word.data = ordered_load_rw(lock);
1041 if (word.can_sleep) {
1042 istate = lck_interlock_lock(lock);
1043 word.data = ordered_load_rw(lock);
1044
1045 if (word.shared_count != 0 || word.want_upgrade) {
1046 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1047
1048 word.w_waiting = 1;
1049 ordered_store_rw(lock, word.data);
1050
1051 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1052 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1053 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1054 lck_interlock_unlock(lock, istate);
1055
1056 if (res == THREAD_WAITING) {
1057 res = thread_block(THREAD_CONTINUE_NULL);
1058 slept++;
1059 }
1060 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1061 } else {
1062 lck_interlock_unlock(lock, istate);
1063 /*
1064 * must own the lock now, since we checked for
1065 * readers or upgrade owner behind the interlock
1066 * no need for a call to 'lck_rw_drain_status'
1067 */
1068 break;
1069 }
1070 }
1071 }
1072
1073 #if CONFIG_DTRACE
1074 /*
1075 * Decide what latencies we suffered that are Dtrace events.
1076 * If we have set wait_interval, then we either spun or slept.
1077 * At least we get out from under the interlock before we record
1078 * which is the best we can do here to minimize the impact
1079 * of the tracing.
1080 * If we have set wait_interval to -1, then dtrace was not enabled when we
1081 * started sleeping/spinning so we don't record this event.
1082 */
1083 if (dtrace_ls_enabled == TRUE) {
1084 if (slept == 0) {
1085 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1086 mach_absolute_time() - wait_interval, 1);
1087 } else {
1088 /*
1089 * For the blocking case, we also record if when we blocked
1090 * it was held for read or write, and how many readers.
1091 * Notice that above we recorded this before we dropped
1092 * the interlock so the count is accurate.
1093 */
1094 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1095 mach_absolute_time() - wait_interval, 1,
1096 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1097 }
1098 }
1099 #endif /* CONFIG_DTRACE */
1100
1101 if (drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1102 lck_rw_drop(lock, LCK_RW_GRAB_F_WANT_EXCL);
1103 assert(lock_pause);
1104 return FALSE;
1105 }
1106
1107 #if CONFIG_DTRACE
1108 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1109 #endif /* CONFIG_DTRACE */
1110
1111 return TRUE;
1112 }
1113
1114 static inline void
lck_rw_lock_check_preemption(lck_rw_t * lock __unused)1115 lck_rw_lock_check_preemption(lck_rw_t *lock __unused)
1116 {
1117 assertf((get_preemption_level() == 0 && ml_get_interrupts_enabled()) ||
1118 startup_phase < STARTUP_SUB_EARLY_BOOT ||
1119 current_cpu_datap()->cpu_hibernate ||
1120 ml_is_quiescing() ||
1121 !not_in_kdp,
1122 "%s: attempt to take rwlock %p in non-preemptible or interrupt context: "
1123 "preemption level = %d, interruptible = %d", __func__, lock,
1124 get_preemption_level(), (int)ml_get_interrupts_enabled());
1125 }
1126
1127 #define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
1128 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
1129 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
1130 /*!
1131 * @function lck_rw_lock_exclusive_check_contended
1132 *
1133 * @abstract
1134 * Locks a rw_lock in exclusive mode.
1135 *
1136 * @discussion
1137 * This routine IS EXPERIMENTAL.
1138 * It's only used for the vm object lock, and use for other subsystems is UNSUPPORTED.
1139 * Note that the return value is ONLY A HEURISTIC w.r.t. the lock's contention.
1140 *
1141 * @param lock rw_lock to lock.
1142 *
1143 * @returns Returns TRUE if the thread spun or blocked while attempting to acquire the lock, FALSE
1144 * otherwise.
1145 */
1146 bool
lck_rw_lock_exclusive_check_contended(lck_rw_t * lock)1147 lck_rw_lock_exclusive_check_contended(
1148 lck_rw_t *lock)
1149 {
1150 thread_t thread = current_thread();
1151 bool contended = false;
1152
1153 if (lock->lck_rw_can_sleep) {
1154 lck_rw_lock_check_preemption(lock);
1155 lck_rw_lock_count_inc(thread, lock);
1156 } else if (get_preemption_level() == 0) {
1157 panic("Taking non-sleepable RW lock with preemption enabled");
1158 }
1159
1160 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1161 #if CONFIG_DTRACE
1162 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1163 #endif /* CONFIG_DTRACE */
1164 } else {
1165 contended = true;
1166 (void) lck_rw_lock_exclusive_gen(lock, NULL);
1167 }
1168 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1169 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1170 ordered_store_rw_owner(lock, thread->ctid);
1171
1172 #ifdef DEBUG_RW
1173 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, __builtin_return_address(0));
1174 #endif /* DEBUG_RW */
1175 return contended;
1176 }
1177
1178 __attribute__((always_inline))
1179 static boolean_t
1180 lck_rw_lock_exclusive_internal_inline(
1181 lck_rw_t *lock,
1182 void *caller,
1183 bool (^lock_pause)(void))
1184 {
1185 #pragma unused(caller)
1186 thread_t thread = current_thread();
1187
1188 if (lock->lck_rw_can_sleep) {
1189 lck_rw_lock_check_preemption(lock);
1190 lck_rw_lock_count_inc(thread, lock);
1191 } else if (get_preemption_level() == 0) {
1192 panic("Taking non-sleepable RW lock with preemption enabled");
1193 }
1194
1195 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1196 #if CONFIG_DTRACE
1197 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1198 #endif /* CONFIG_DTRACE */
1199 } else if (!lck_rw_lock_exclusive_gen(lock, lock_pause)) {
1200 /*
1201 * lck_rw_lock_exclusive_gen() should only return
1202 * early if lock_pause has been passed and
1203 * returns FALSE. lock_pause is exclusive with
1204 * lck_rw_can_sleep().
1205 */
1206 assert(!lock->lck_rw_can_sleep);
1207 return FALSE;
1208 }
1209
1210 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1211 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1212 ordered_store_rw_owner(lock, thread->ctid);
1213
1214 #if DEBUG_RW
1215 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1216 #endif /* DEBUG_RW */
1217
1218 return TRUE;
1219 }
1220
1221 __attribute__((noinline))
1222 static void
lck_rw_lock_exclusive_internal(lck_rw_t * lock,void * caller)1223 lck_rw_lock_exclusive_internal(
1224 lck_rw_t *lock,
1225 void *caller)
1226 {
1227 (void) lck_rw_lock_exclusive_internal_inline(lock, caller, NULL);
1228 }
1229
1230 /*!
1231 * @function lck_rw_lock_exclusive
1232 *
1233 * @abstract
1234 * Locks a rw_lock in exclusive mode.
1235 *
1236 * @discussion
1237 * This function can block.
1238 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1239 * can acquire it in exclusive mode.
1240 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1241 *
1242 * @param lock rw_lock to lock.
1243 */
1244 __mockable
1245 void
lck_rw_lock_exclusive(lck_rw_t * lock)1246 lck_rw_lock_exclusive(
1247 lck_rw_t *lock)
1248 {
1249 (void) lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), NULL);
1250 }
1251
1252 /*!
1253 * @function lck_rw_lock_exclusive_b
1254 *
1255 * @abstract
1256 * Locks a rw_lock in exclusive mode. Returns early if the lock can't be acquired
1257 * and the specified block returns true.
1258 *
1259 * @discussion
1260 * Identical to lck_rw_lock_exclusive() but can return early if the lock can't be
1261 * acquired and the specified block returns true. The block is called
1262 * repeatedly when waiting to acquire the lock.
1263 * Should only be called when the lock cannot sleep (i.e. when
1264 * lock->lck_rw_can_sleep is false).
1265 *
1266 * @param lock rw_lock to lock.
1267 * @param lock_pause block invoked while waiting to acquire lock
1268 *
1269 * @returns Returns TRUE if the lock is successfully taken,
1270 * FALSE if the block returns true and the lock has
1271 * not been acquired.
1272 */
1273 boolean_t
1274 lck_rw_lock_exclusive_b(
1275 lck_rw_t *lock,
1276 bool (^lock_pause)(void))
1277 {
1278 assert(!lock->lck_rw_can_sleep);
1279
1280 return lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), lock_pause);
1281 }
1282
1283 /*
1284 * Routine: lck_rw_lock_shared_gen
1285 * Function:
1286 * Fast path code has determined that this lock
1287 * is held exclusively... this is where we spin/block
1288 * until we can acquire the lock in the shared mode
1289 */
1290 static boolean_t
1291 lck_rw_lock_shared_gen(
1292 lck_rw_t *lck,
1293 bool (^lock_pause)(void))
1294 {
1295 __assert_only thread_t self = current_thread();
1296 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1297 lck_rw_word_t word;
1298 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
1299 int slept = 0;
1300 wait_result_t res = 0;
1301 boolean_t istate;
1302
1303 #if CONFIG_DTRACE
1304 uint64_t wait_interval = 0;
1305 int readers_at_sleep = 0;
1306 boolean_t dtrace_ls_initialized = FALSE;
1307 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1308 #endif /* CONFIG_DTRACE */
1309
1310 assertf(lck->lck_rw_owner != self->ctid,
1311 "Lock already held state=0x%x, owner=%p",
1312 ordered_load_rw(lck), self);
1313
1314 #ifdef DEBUG_RW
1315 /*
1316 * Best effort attempt to check that this thread
1317 * is not already holding the lock in shared mode.
1318 */
1319 assert_canlock_rwlock(lck, self, LCK_RW_TYPE_SHARED);
1320 #endif
1321
1322 while (lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED, NULL) != LCK_RW_GRAB_S_LOCKED) {
1323 #if CONFIG_DTRACE
1324 if (dtrace_ls_initialized == FALSE) {
1325 dtrace_ls_initialized = TRUE;
1326 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1327 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1328 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1329 if (dtrace_ls_enabled) {
1330 /*
1331 * Either sleeping or spinning is happening,
1332 * start a timing of our delay interval now.
1333 */
1334 readers_at_sleep = lck->lck_rw_shared_count;
1335 wait_interval = mach_absolute_time();
1336 }
1337 }
1338 #endif
1339
1340 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1341 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
1342
1343 grab_state = lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED | LCK_RW_GRAB_F_WAIT, lock_pause);
1344
1345 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1346 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, grab_state, 0);
1347
1348 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
1349 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1350 break;
1351 }
1352
1353 /*
1354 * if we get here, the deadline has expired w/o us
1355 * being able to grab the lock for read
1356 * check to see if we're allowed to do a thread_block
1357 */
1358 if (lck->lck_rw_can_sleep) {
1359 istate = lck_interlock_lock(lck);
1360
1361 word.data = ordered_load_rw(lck);
1362 if ((word.want_excl || word.want_upgrade) &&
1363 ((word.shared_count == 0) || word.priv_excl)) {
1364 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1365 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
1366
1367 word.r_waiting = 1;
1368 ordered_store_rw(lck, word.data);
1369
1370 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
1371 res = assert_wait(LCK_RW_READER_EVENT(lck),
1372 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1373 lck_interlock_unlock(lck, istate);
1374
1375 if (res == THREAD_WAITING) {
1376 res = thread_block(THREAD_CONTINUE_NULL);
1377 slept++;
1378 }
1379 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1380 trace_lck, res, slept, 0, 0);
1381 } else {
1382 word.shared_count++;
1383 ordered_store_rw(lck, word.data);
1384 lck_interlock_unlock(lck, istate);
1385 break;
1386 }
1387 }
1388 }
1389
1390 #if CONFIG_DTRACE
1391 if (dtrace_ls_enabled == TRUE) {
1392 if (slept == 0) {
1393 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1394 } else {
1395 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1396 mach_absolute_time() - wait_interval, 0,
1397 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1398 }
1399 }
1400 #endif /* CONFIG_DTRACE */
1401
1402 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1403 assert(lock_pause);
1404 return FALSE;
1405 }
1406
1407 #if CONFIG_DTRACE
1408 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1409 #endif /* CONFIG_DTRACE */
1410
1411 return TRUE;
1412 }
1413
1414 __attribute__((always_inline))
1415 static boolean_t
1416 lck_rw_lock_shared_internal_inline(
1417 lck_rw_t *lock,
1418 void *caller,
1419 bool (^lock_pause)(void))
1420 {
1421 #pragma unused(caller)
1422
1423 uint32_t data, prev;
1424 thread_t thread = current_thread();
1425 #ifdef DEBUG_RW
1426 boolean_t check_canlock = TRUE;
1427 #endif
1428
1429 if (lock->lck_rw_can_sleep) {
1430 lck_rw_lock_check_preemption(lock);
1431 lck_rw_lock_count_inc(thread, lock);
1432 } else if (get_preemption_level() == 0) {
1433 panic("Taking non-sleepable RW lock with preemption enabled");
1434 }
1435
1436 for (;;) {
1437 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1438 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1439 atomic_exchange_abort();
1440 if (!lck_rw_lock_shared_gen(lock, lock_pause)) {
1441 /*
1442 * lck_rw_lock_shared_gen() should only return
1443 * early if lock_pause has been passed and
1444 * returns FALSE. lock_pause is exclusive with
1445 * lck_rw_can_sleep().
1446 */
1447 assert(!lock->lck_rw_can_sleep);
1448 return FALSE;
1449 }
1450
1451 goto locked;
1452 }
1453 #ifdef DEBUG_RW
1454 if ((data & LCK_RW_SHARED_MASK) == 0) {
1455 /*
1456 * If the lock is uncontended,
1457 * we do not need to check if we can lock it
1458 */
1459 check_canlock = FALSE;
1460 }
1461 #endif
1462 data += LCK_RW_SHARED_READER;
1463 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1464 break;
1465 }
1466 cpu_pause();
1467 }
1468 #ifdef DEBUG_RW
1469 if (check_canlock) {
1470 /*
1471 * Best effort attempt to check that this thread
1472 * is not already holding the lock (this checks read mode too).
1473 */
1474 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1475 }
1476 #endif
1477 locked:
1478 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1479 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1480
1481 #if CONFIG_DTRACE
1482 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1483 #endif /* CONFIG_DTRACE */
1484
1485 #ifdef DEBUG_RW
1486 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1487 #endif /* DEBUG_RW */
1488
1489 return TRUE;
1490 }
1491
1492 __attribute__((noinline))
1493 static void
lck_rw_lock_shared_internal(lck_rw_t * lock,void * caller)1494 lck_rw_lock_shared_internal(
1495 lck_rw_t *lock,
1496 void *caller)
1497 {
1498 (void) lck_rw_lock_shared_internal_inline(lock, caller, NULL);
1499 }
1500
1501 /*!
1502 * @function lck_rw_lock_shared
1503 *
1504 * @abstract
1505 * Locks a rw_lock in shared mode.
1506 *
1507 * @discussion
1508 * This function can block.
1509 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1510 * can acquire it in exclusive mode.
1511 * If the lock is held in shared mode and there are no writers waiting, a reader will be able to acquire
1512 * the lock without waiting.
1513 * If the lock is held in shared mode and there is at least a writer waiting, a reader will wait
1514 * for all the writers to make progress if the lock was initialized with the default settings. Instead if
1515 * RW_SHARED_PRIORITY was selected at initialization time, a reader will never wait if the lock is held
1516 * in shared mode.
1517 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1518 *
1519 * @param lock rw_lock to lock.
1520 */
1521 __mockable
1522 void
lck_rw_lock_shared(lck_rw_t * lock)1523 lck_rw_lock_shared(
1524 lck_rw_t *lock)
1525 {
1526 (void) lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), NULL);
1527 }
1528
1529 /*!
1530 * @function lck_rw_lock_shared_b
1531 *
1532 * @abstract
1533 * Locks a rw_lock in shared mode. Returns early if the lock can't be acquired
1534 * and the specified block returns true.
1535 *
1536 * @discussion
1537 * Identical to lck_rw_lock_shared() but can return early if the lock can't be
1538 * acquired and the specified block returns true. The block is called
1539 * repeatedly when waiting to acquire the lock.
1540 * Should only be called when the lock cannot sleep (i.e. when
1541 * lock->lck_rw_can_sleep is false).
1542 *
1543 * @param lock rw_lock to lock.
1544 * @param lock_pause block invoked while waiting to acquire lock
1545 *
1546 * @returns Returns TRUE if the lock is successfully taken,
1547 * FALSE if the block returns true and the lock has
1548 * not been acquired.
1549 */
1550 boolean_t
1551 lck_rw_lock_shared_b(
1552 lck_rw_t *lock,
1553 bool (^lock_pause)(void))
1554 {
1555 assert(!lock->lck_rw_can_sleep);
1556
1557 return lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), lock_pause);
1558 }
1559
1560 /*
1561 * Routine: lck_rw_lock_shared_to_exclusive_failure
1562 * Function:
1563 * Fast path code has already dropped our read
1564 * count and determined that someone else owns 'lck_rw_want_upgrade'
1565 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1566 * all we need to do here is determine if a wakeup is needed
1567 */
1568 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t * lck,uint32_t prior_lock_state)1569 lck_rw_lock_shared_to_exclusive_failure(
1570 lck_rw_t *lck,
1571 uint32_t prior_lock_state)
1572 {
1573 thread_t thread = current_thread();
1574
1575 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1576 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1577 /*
1578 * Someone else has requested upgrade.
1579 * Since we've released the read lock, wake
1580 * him up if he's blocked waiting
1581 */
1582 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1583 }
1584
1585 /* Check if dropping the lock means that we need to unpromote */
1586 if (lck->lck_rw_can_sleep) {
1587 lck_rw_lock_count_dec(thread, lck);
1588 }
1589
1590 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1591 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1592
1593 #ifdef DEBUG_RW
1594 remove_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
1595 #endif /* DEBUG_RW */
1596
1597 return FALSE;
1598 }
1599
1600 /*
1601 * Routine: lck_rw_lock_shared_to_exclusive_success
1602 * Function:
1603 * the fast path code has already dropped our read
1604 * count and successfully acquired 'lck_rw_want_upgrade'
1605 * we just need to wait for the rest of the readers to drain
1606 * and then we can return as the exclusive holder of this lock
1607 */
1608 static void
lck_rw_lock_shared_to_exclusive_success(lck_rw_t * lock)1609 lck_rw_lock_shared_to_exclusive_success(
1610 lck_rw_t *lock)
1611 {
1612 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1613 int slept = 0;
1614 lck_rw_word_t word;
1615 wait_result_t res;
1616 boolean_t istate;
1617 lck_rw_drain_state_t drain_state;
1618
1619 #if CONFIG_DTRACE
1620 uint64_t wait_interval = 0;
1621 int readers_at_sleep = 0;
1622 boolean_t dtrace_ls_initialized = FALSE;
1623 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1624 #endif
1625
1626 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
1627 word.data = ordered_load_rw(lock);
1628 #if CONFIG_DTRACE
1629 if (dtrace_ls_initialized == FALSE) {
1630 dtrace_ls_initialized = TRUE;
1631 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1632 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1633 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1634 if (dtrace_ls_enabled) {
1635 /*
1636 * Either sleeping or spinning is happening,
1637 * start a timing of our delay interval now.
1638 */
1639 readers_at_sleep = word.shared_count;
1640 wait_interval = mach_absolute_time();
1641 }
1642 }
1643 #endif
1644
1645 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1646 trace_lck, word.shared_count, 0, 0, 0);
1647
1648 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE, NULL);
1649
1650 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1651 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1652
1653 if (drain_state == LCK_RW_DRAIN_S_DRAINED) {
1654 break;
1655 }
1656
1657 /*
1658 * if we get here, the spin deadline in lck_rw_wait_on_status()
1659 * has expired w/o the rw_shared_count having drained to 0
1660 * check to see if we're allowed to do a thread_block
1661 */
1662 if (word.can_sleep) {
1663 istate = lck_interlock_lock(lock);
1664
1665 word.data = ordered_load_rw(lock);
1666 if (word.shared_count != 0) {
1667 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1668 trace_lck, word.shared_count, 0, 0, 0);
1669
1670 word.w_waiting = 1;
1671 ordered_store_rw(lock, word.data);
1672
1673 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1674 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1675 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1676 lck_interlock_unlock(lock, istate);
1677
1678 if (res == THREAD_WAITING) {
1679 res = thread_block(THREAD_CONTINUE_NULL);
1680 slept++;
1681 }
1682 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1683 trace_lck, res, slept, 0, 0);
1684 } else {
1685 lck_interlock_unlock(lock, istate);
1686 break;
1687 }
1688 }
1689 }
1690 #if CONFIG_DTRACE
1691 /*
1692 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1693 */
1694 if (dtrace_ls_enabled == TRUE) {
1695 if (slept == 0) {
1696 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1697 } else {
1698 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1699 mach_absolute_time() - wait_interval, 1,
1700 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1701 }
1702 }
1703 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1704 #endif
1705 }
1706
1707 /*!
1708 * @function lck_rw_lock_shared_to_exclusive
1709 *
1710 * @abstract
1711 * Upgrades a rw_lock held in shared mode to exclusive.
1712 *
1713 * @discussion
1714 * This function can block.
1715 * Only one reader at a time can upgrade to exclusive mode. If the upgrades fails the function will
1716 * return with the lock not held.
1717 * The caller needs to hold the lock in shared mode to upgrade it.
1718 *
1719 * @param lock rw_lock already held in shared mode to upgrade.
1720 *
1721 * @returns TRUE if the lock was upgraded, FALSE if it was not possible.
1722 * If the function was not able to upgrade the lock, the lock will be dropped
1723 * by the function.
1724 */
1725 __mockable
1726 boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t * lock)1727 lck_rw_lock_shared_to_exclusive(
1728 lck_rw_t *lock)
1729 {
1730 thread_t thread = current_thread();
1731 uint32_t data, prev;
1732
1733 assertf(lock->lck_rw_priv_excl != 0, "lock %p thread %p", lock, current_thread());
1734
1735 #if DEBUG_RW
1736 assert_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1737 #endif /* DEBUG_RW */
1738
1739 for (;;) {
1740 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1741 if (data & LCK_RW_INTERLOCK) {
1742 atomic_exchange_abort();
1743 lck_rw_interlock_spin(lock);
1744 continue;
1745 }
1746 if (data & LCK_RW_WANT_UPGRADE) {
1747 data -= LCK_RW_SHARED_READER;
1748 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1749 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1750 }
1751 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1752 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1753 }
1754 } else {
1755 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1756 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1757 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1758 break;
1759 }
1760 }
1761 cpu_pause();
1762 }
1763 /* we now own the WANT_UPGRADE */
1764 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1765 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1766 }
1767
1768 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1769 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1770
1771 ordered_store_rw_owner(lock, thread->ctid);
1772 #if CONFIG_DTRACE
1773 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1774 #endif /* CONFIG_DTRACE */
1775
1776 #if DEBUG_RW
1777 change_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, __builtin_return_address(0));
1778 #endif /* DEBUG_RW */
1779 return TRUE;
1780 }
1781
1782 /*
1783 * Routine: lck_rw_lock_exclusive_to_shared_gen
1784 * Function:
1785 * Fast path has already dropped
1786 * our exclusive state and bumped lck_rw_shared_count
1787 * all we need to do here is determine if anyone
1788 * needs to be awakened.
1789 */
1790 static void
lck_rw_lock_exclusive_to_shared_gen(lck_rw_t * lck,uint32_t prior_lock_state,void * caller)1791 lck_rw_lock_exclusive_to_shared_gen(
1792 lck_rw_t *lck,
1793 uint32_t prior_lock_state,
1794 void *caller)
1795 {
1796 #pragma unused(caller)
1797 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1798 lck_rw_word_t fake_lck;
1799
1800 /*
1801 * prior_lock state is a snapshot of the 1st word of the
1802 * lock in question... we'll fake up a pointer to it
1803 * and carefully not access anything beyond whats defined
1804 * in the first word of a lck_rw_t
1805 */
1806 fake_lck.data = prior_lock_state;
1807
1808 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1809 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1810
1811 /*
1812 * don't wake up anyone waiting to take the lock exclusively
1813 * since we hold a read count... when the read count drops to 0,
1814 * the writers will be woken.
1815 *
1816 * wake up any waiting readers if we don't have any writers waiting,
1817 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1818 */
1819 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1820 thread_wakeup(LCK_RW_READER_EVENT(lck));
1821 }
1822
1823 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1824 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1825
1826 #if CONFIG_DTRACE
1827 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1828 #endif
1829
1830 #if DEBUG_RW
1831 thread_t thread = current_thread();
1832 change_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1833 #endif /* DEBUG_RW */
1834 }
1835
1836 /*!
1837 * @function lck_rw_lock_exclusive_to_shared
1838 *
1839 * @abstract
1840 * Downgrades a rw_lock held in exclusive mode to shared.
1841 *
1842 * @discussion
1843 * The caller needs to hold the lock in exclusive mode to be able to downgrade it.
1844 *
1845 * @param lock rw_lock already held in exclusive mode to downgrade.
1846 */
1847 __mockable
1848 void
lck_rw_lock_exclusive_to_shared(lck_rw_t * lock)1849 lck_rw_lock_exclusive_to_shared(
1850 lck_rw_t *lock)
1851 {
1852 uint32_t data, prev;
1853
1854 assertf(lock->lck_rw_owner == current_thread()->ctid,
1855 "state=0x%x, owner=%p", lock->lck_rw_data,
1856 ctid_get_thread_unsafe(lock->lck_rw_owner));
1857 ordered_store_rw_owner(lock, 0);
1858
1859 for (;;) {
1860 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1861 if (data & LCK_RW_INTERLOCK) {
1862 atomic_exchange_abort();
1863 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1864 continue;
1865 }
1866 data += LCK_RW_SHARED_READER;
1867 if (data & LCK_RW_WANT_UPGRADE) {
1868 data &= ~(LCK_RW_WANT_UPGRADE);
1869 } else {
1870 data &= ~(LCK_RW_WANT_EXCL);
1871 }
1872 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1873 data &= ~(LCK_RW_W_WAITING);
1874 }
1875 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1876 break;
1877 }
1878 cpu_pause();
1879 }
1880 lck_rw_lock_exclusive_to_shared_gen(lock, prev, __builtin_return_address(0));
1881 }
1882
1883 /*
1884 * Very sad hack, but the codegen for lck_rw_lock
1885 * is very unhappy with the combination of __builtin_return_address()
1886 * and a noreturn function. For some reason it adds more frames
1887 * than it should. rdar://76570684
1888 */
1889 void
1890 _lck_rw_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1891 #pragma clang diagnostic push
1892 #pragma clang diagnostic ignored "-Wmissing-noreturn"
1893 __attribute__((noinline, weak))
1894 void
_lck_rw_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1895 _lck_rw_lock_type_panic(
1896 lck_rw_t *lck,
1897 lck_rw_type_t lck_rw_type)
1898 {
1899 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1900 }
1901 #pragma clang diagnostic pop
1902
1903 /*!
1904 * @function lck_rw_lock
1905 *
1906 * @abstract
1907 * Locks a rw_lock with the specified type.
1908 *
1909 * @discussion
1910 * See lck_rw_lock_shared() or lck_rw_lock_exclusive() for more details.
1911 *
1912 * @param lck rw_lock to lock.
1913 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1914 */
1915 __mockable
1916 void
lck_rw_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1917 lck_rw_lock(
1918 lck_rw_t *lck,
1919 lck_rw_type_t lck_rw_type)
1920 {
1921 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1922 return lck_rw_lock_shared_internal(lck, __builtin_return_address(0));
1923 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1924 return lck_rw_lock_exclusive_internal(lck, __builtin_return_address(0));
1925 }
1926 _lck_rw_lock_type_panic(lck, lck_rw_type);
1927 }
1928
1929 __attribute__((always_inline))
1930 static boolean_t
lck_rw_try_lock_shared_internal_inline(lck_rw_t * lock,void * caller)1931 lck_rw_try_lock_shared_internal_inline(
1932 lck_rw_t *lock,
1933 void *caller)
1934 {
1935 #pragma unused(caller)
1936
1937 uint32_t data, prev;
1938 thread_t thread = current_thread();
1939 #ifdef DEBUG_RW
1940 boolean_t check_canlock = TRUE;
1941 #endif
1942
1943 for (;;) {
1944 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1945 if (data & LCK_RW_INTERLOCK) {
1946 atomic_exchange_abort();
1947 lck_rw_interlock_spin(lock);
1948 continue;
1949 }
1950 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1951 atomic_exchange_abort();
1952 return FALSE; /* lock is busy */
1953 }
1954 #ifdef DEBUG_RW
1955 if ((data & LCK_RW_SHARED_MASK) == 0) {
1956 /*
1957 * If the lock is uncontended,
1958 * we do not need to check if we can lock it
1959 */
1960 check_canlock = FALSE;
1961 }
1962 #endif
1963 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1964 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1965 break;
1966 }
1967 cpu_pause();
1968 }
1969 #ifdef DEBUG_RW
1970 if (check_canlock) {
1971 /*
1972 * Best effort attempt to check that this thread
1973 * is not already holding the lock (this checks read mode too).
1974 */
1975 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1976 }
1977 #endif
1978 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1979 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1980
1981 if (lock->lck_rw_can_sleep) {
1982 lck_rw_lock_count_inc(thread, lock);
1983 } else if (get_preemption_level() == 0) {
1984 panic("Taking non-sleepable RW lock with preemption enabled");
1985 }
1986
1987 #if CONFIG_DTRACE
1988 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1989 #endif /* CONFIG_DTRACE */
1990
1991 #ifdef DEBUG_RW
1992 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1993 #endif /* DEBUG_RW */
1994 return TRUE;
1995 }
1996
1997 __attribute__((noinline))
1998 static boolean_t
lck_rw_try_lock_shared_internal(lck_rw_t * lock,void * caller)1999 lck_rw_try_lock_shared_internal(
2000 lck_rw_t *lock,
2001 void *caller)
2002 {
2003 return lck_rw_try_lock_shared_internal_inline(lock, caller);
2004 }
2005
2006 /*!
2007 * @function lck_rw_try_lock_shared
2008 *
2009 * @abstract
2010 * Tries to locks a rw_lock in read mode.
2011 *
2012 * @discussion
2013 * This function will return and not block in case the lock is already held.
2014 * See lck_rw_lock_shared for more details.
2015 *
2016 * @param lock rw_lock to lock.
2017 *
2018 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2019 */
2020 __mockable
2021 boolean_t
lck_rw_try_lock_shared(lck_rw_t * lock)2022 lck_rw_try_lock_shared(
2023 lck_rw_t *lock)
2024 {
2025 return lck_rw_try_lock_shared_internal_inline(lock, __builtin_return_address(0));
2026 }
2027
2028 __attribute__((always_inline))
2029 static boolean_t
lck_rw_try_lock_exclusive_internal_inline(lck_rw_t * lock,void * caller)2030 lck_rw_try_lock_exclusive_internal_inline(
2031 lck_rw_t *lock,
2032 void *caller)
2033 {
2034 #pragma unused(caller)
2035 uint32_t data, prev;
2036
2037 for (;;) {
2038 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
2039 if (data & LCK_RW_INTERLOCK) {
2040 atomic_exchange_abort();
2041 lck_rw_interlock_spin(lock);
2042 continue;
2043 }
2044 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
2045 atomic_exchange_abort();
2046 return FALSE;
2047 }
2048 data |= LCK_RW_WANT_EXCL;
2049 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
2050 break;
2051 }
2052 cpu_pause();
2053 }
2054 thread_t thread = current_thread();
2055
2056 if (lock->lck_rw_can_sleep) {
2057 lck_rw_lock_count_inc(thread, lock);
2058 } else if (get_preemption_level() == 0) {
2059 panic("Taking non-sleepable RW lock with preemption enabled");
2060 }
2061
2062 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
2063 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
2064
2065 ordered_store_rw_owner(lock, thread->ctid);
2066 #if CONFIG_DTRACE
2067 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
2068 #endif /* CONFIG_DTRACE */
2069
2070 #ifdef DEBUG_RW
2071 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
2072 #endif /* DEBUG_RW */
2073 return TRUE;
2074 }
2075
2076 __attribute__((noinline))
2077 static boolean_t
lck_rw_try_lock_exclusive_internal(lck_rw_t * lock,void * caller)2078 lck_rw_try_lock_exclusive_internal(
2079 lck_rw_t *lock,
2080 void *caller)
2081 {
2082 return lck_rw_try_lock_exclusive_internal_inline(lock, caller);
2083 }
2084
2085 /*!
2086 * @function lck_rw_try_lock_exclusive
2087 *
2088 * @abstract
2089 * Tries to locks a rw_lock in write mode.
2090 *
2091 * @discussion
2092 * This function will return and not block in case the lock is already held.
2093 * See lck_rw_lock_exclusive for more details.
2094 *
2095 * @param lock rw_lock to lock.
2096 *
2097 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2098 */
2099 __mockable
2100 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t * lock)2101 lck_rw_try_lock_exclusive(
2102 lck_rw_t *lock)
2103 {
2104 return lck_rw_try_lock_exclusive_internal_inline(lock, __builtin_return_address(0));
2105 }
2106
2107 /*
2108 * Very sad hack, but the codegen for lck_rw_try_lock
2109 * is very unhappy with the combination of __builtin_return_address()
2110 * and a noreturn function. For some reason it adds more frames
2111 * than it should. rdar://76570684
2112 */
2113 boolean_t
2114 _lck_rw_try_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
2115 #pragma clang diagnostic push
2116 #pragma clang diagnostic ignored "-Wmissing-noreturn"
2117 __attribute__((noinline, weak))
2118 boolean_t
_lck_rw_try_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2119 _lck_rw_try_lock_type_panic(
2120 lck_rw_t *lck,
2121 lck_rw_type_t lck_rw_type)
2122 {
2123 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
2124 }
2125 #pragma clang diagnostic pop
2126
2127 /*!
2128 * @function lck_rw_try_lock
2129 *
2130 * @abstract
2131 * Tries to locks a rw_lock with the specified type.
2132 *
2133 * @discussion
2134 * This function will return and not wait/block in case the lock is already held.
2135 * See lck_rw_try_lock_shared() or lck_rw_try_lock_exclusive() for more details.
2136 *
2137 * @param lck rw_lock to lock.
2138 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2139 *
2140 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2141 */
2142 __mockable
2143 boolean_t
lck_rw_try_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2144 lck_rw_try_lock(
2145 lck_rw_t *lck,
2146 lck_rw_type_t lck_rw_type)
2147 {
2148 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2149 return lck_rw_try_lock_shared_internal(lck, __builtin_return_address(0));
2150 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2151 return lck_rw_try_lock_exclusive_internal(lck, __builtin_return_address(0));
2152 }
2153 return _lck_rw_try_lock_type_panic(lck, lck_rw_type);
2154 }
2155
2156 /*
2157 * Routine: lck_rw_done_gen
2158 *
2159 * prior_lock_state is the value in the 1st
2160 * word of the lock at the time of a successful
2161 * atomic compare and exchange with the new value...
2162 * it represents the state of the lock before we
2163 * decremented the rw_shared_count or cleared either
2164 * rw_want_upgrade or rw_want_write and
2165 * the lck_x_waiting bits... since the wrapper
2166 * routine has already changed the state atomically,
2167 * we just need to decide if we should
2168 * wake up anyone and what value to return... we do
2169 * this by examining the state of the lock before
2170 * we changed it
2171 */
2172 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t * lck,uint32_t prior_lock_state)2173 lck_rw_done_gen(
2174 lck_rw_t *lck,
2175 uint32_t prior_lock_state)
2176 {
2177 lck_rw_word_t fake_lck;
2178 lck_rw_type_t lock_type;
2179 thread_t thread;
2180
2181 /*
2182 * prior_lock state is a snapshot of the 1st word of the
2183 * lock in question... we'll fake up a pointer to it
2184 * and carefully not access anything beyond whats defined
2185 * in the first word of a lck_rw_t
2186 */
2187 fake_lck.data = prior_lock_state;
2188
2189 if (fake_lck.shared_count <= 1) {
2190 if (fake_lck.w_waiting) {
2191 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2192 }
2193
2194 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
2195 thread_wakeup(LCK_RW_READER_EVENT(lck));
2196 }
2197 }
2198 if (fake_lck.shared_count) {
2199 lock_type = LCK_RW_TYPE_SHARED;
2200 } else {
2201 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2202 }
2203
2204 /* Check if dropping the lock means that we need to unpromote */
2205 thread = current_thread();
2206 if (fake_lck.can_sleep) {
2207 lck_rw_lock_count_dec(thread, lck);
2208 }
2209
2210 #if CONFIG_DTRACE
2211 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2212 #endif
2213
2214 #ifdef DEBUG_RW
2215 remove_held_rwlock(lck, thread, lock_type);
2216 #endif /* DEBUG_RW */
2217 return lock_type;
2218 }
2219
2220 /*!
2221 * @function lck_rw_done
2222 *
2223 * @abstract
2224 * Force unlocks a rw_lock without consistency checks.
2225 *
2226 * @discussion
2227 * Do not use unless sure you can avoid consistency checks.
2228 *
2229 * @param lock rw_lock to unlock.
2230 */
2231 __mockable
2232 lck_rw_type_t
lck_rw_done(lck_rw_t * lock)2233 lck_rw_done(
2234 lck_rw_t *lock)
2235 {
2236 uint32_t data, prev;
2237 boolean_t once = FALSE;
2238
2239 #ifdef DEBUG_RW
2240 /*
2241 * Best effort attempt to check that this thread
2242 * is holding the lock.
2243 */
2244 thread_t thread = current_thread();
2245 assert_held_rwlock(lock, thread, 0);
2246 #endif /* DEBUG_RW */
2247 for (;;) {
2248 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2249 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2250 atomic_exchange_abort();
2251 lck_rw_interlock_spin(lock);
2252 continue;
2253 }
2254 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2255 assertf(lock->lck_rw_owner == 0,
2256 "state=0x%x, owner=%p", lock->lck_rw_data,
2257 ctid_get_thread_unsafe(lock->lck_rw_owner));
2258 data -= LCK_RW_SHARED_READER;
2259 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
2260 goto check_waiters;
2261 }
2262 } else { /* if reader count == 0, must be exclusive lock */
2263 if (data & LCK_RW_WANT_UPGRADE) {
2264 data &= ~(LCK_RW_WANT_UPGRADE);
2265 } else {
2266 if (data & LCK_RW_WANT_EXCL) {
2267 data &= ~(LCK_RW_WANT_EXCL);
2268 } else { /* lock is not 'owned', panic */
2269 panic("Releasing non-exclusive RW lock without a reader refcount!");
2270 }
2271 }
2272 if (!once) {
2273 // Only check for holder and clear it once
2274 assertf(lock->lck_rw_owner == current_thread()->ctid,
2275 "state=0x%x, owner=%p", lock->lck_rw_data,
2276 ctid_get_thread_unsafe(lock->lck_rw_owner));
2277 ordered_store_rw_owner(lock, 0);
2278 once = TRUE;
2279 }
2280 check_waiters:
2281 /*
2282 * test the original values to match what
2283 * lck_rw_done_gen is going to do to determine
2284 * which wakeups need to happen...
2285 *
2286 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2287 */
2288 if (prev & LCK_RW_W_WAITING) {
2289 data &= ~(LCK_RW_W_WAITING);
2290 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
2291 data &= ~(LCK_RW_R_WAITING);
2292 }
2293 } else {
2294 data &= ~(LCK_RW_R_WAITING);
2295 }
2296 }
2297 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
2298 break;
2299 }
2300 cpu_pause();
2301 }
2302 return lck_rw_done_gen(lock, prev);
2303 }
2304
2305 /*!
2306 * @function lck_rw_unlock_shared
2307 *
2308 * @abstract
2309 * Unlocks a rw_lock previously locked in shared mode.
2310 *
2311 * @discussion
2312 * The same thread that locked the lock needs to unlock it.
2313 *
2314 * @param lck rw_lock held in shared mode to unlock.
2315 */
2316 __mockable
2317 void
lck_rw_unlock_shared(lck_rw_t * lck)2318 lck_rw_unlock_shared(
2319 lck_rw_t *lck)
2320 {
2321 lck_rw_type_t ret;
2322
2323 assertf(lck->lck_rw_owner == 0,
2324 "state=0x%x, owner=%p", lck->lck_rw_data,
2325 ctid_get_thread_unsafe(lck->lck_rw_owner));
2326 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
2327 ret = lck_rw_done(lck);
2328
2329 if (ret != LCK_RW_TYPE_SHARED) {
2330 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
2331 }
2332 }
2333
2334 /*!
2335 * @function lck_rw_unlock_exclusive
2336 *
2337 * @abstract
2338 * Unlocks a rw_lock previously locked in exclusive mode.
2339 *
2340 * @discussion
2341 * The same thread that locked the lock needs to unlock it.
2342 *
2343 * @param lck rw_lock held in exclusive mode to unlock.
2344 */
2345 __mockable
2346 void
lck_rw_unlock_exclusive(lck_rw_t * lck)2347 lck_rw_unlock_exclusive(
2348 lck_rw_t *lck)
2349 {
2350 lck_rw_type_t ret;
2351
2352 assertf(lck->lck_rw_owner == current_thread()->ctid,
2353 "state=0x%x, owner=%p", lck->lck_rw_data,
2354 ctid_get_thread_unsafe(lck->lck_rw_owner));
2355 ret = lck_rw_done(lck);
2356
2357 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
2358 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
2359 }
2360 }
2361
2362 /*!
2363 * @function lck_rw_unlock
2364 *
2365 * @abstract
2366 * Unlocks a rw_lock previously locked with lck_rw_type.
2367 *
2368 * @discussion
2369 * The lock must be unlocked by the same thread it was locked from.
2370 * The type of the lock/unlock have to match, unless an upgrade/downgrade was performed while
2371 * holding the lock.
2372 *
2373 * @param lck rw_lock to unlock.
2374 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2375 */
2376 __mockable
2377 void
lck_rw_unlock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2378 lck_rw_unlock(
2379 lck_rw_t *lck,
2380 lck_rw_type_t lck_rw_type)
2381 {
2382 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2383 lck_rw_unlock_shared(lck);
2384 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2385 lck_rw_unlock_exclusive(lck);
2386 } else {
2387 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
2388 }
2389 }
2390
2391 /*!
2392 * @function lck_rw_assert
2393 *
2394 * @abstract
2395 * Asserts the rw_lock is held.
2396 *
2397 * @discussion
2398 * read-write locks do not have a concept of ownership when held in shared mode,
2399 * so this function merely asserts that someone is holding the lock, not necessarily the caller.
2400 * However if rw_lock_debug is on, a best effort mechanism to track the owners is in place, and
2401 * this function can be more accurate.
2402 * Type can be LCK_RW_ASSERT_SHARED, LCK_RW_ASSERT_EXCLUSIVE, LCK_RW_ASSERT_HELD
2403 * LCK_RW_ASSERT_NOTHELD.
2404 *
2405 * @param lck rw_lock to check.
2406 * @param type assert type
2407 */
2408 __mockable
2409 void
lck_rw_assert(lck_rw_t * lck,unsigned int type)2410 lck_rw_assert(
2411 lck_rw_t *lck,
2412 unsigned int type)
2413 {
2414 thread_t thread = current_thread();
2415
2416 switch (type) {
2417 case LCK_RW_ASSERT_SHARED:
2418 if ((lck->lck_rw_shared_count != 0) &&
2419 (lck->lck_rw_owner == 0)) {
2420 #if DEBUG_RW
2421 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2422 #endif /* DEBUG_RW */
2423 return;
2424 }
2425 break;
2426 case LCK_RW_ASSERT_EXCLUSIVE:
2427 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2428 (lck->lck_rw_shared_count == 0) &&
2429 (lck->lck_rw_owner == thread->ctid)) {
2430 #if DEBUG_RW
2431 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2432 #endif /* DEBUG_RW */
2433 return;
2434 }
2435 break;
2436 case LCK_RW_ASSERT_HELD:
2437 if (lck->lck_rw_shared_count != 0) {
2438 #if DEBUG_RW
2439 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2440 #endif /* DEBUG_RW */
2441 return; // Held shared
2442 }
2443 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2444 (lck->lck_rw_owner == thread->ctid)) {
2445 #if DEBUG_RW
2446 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2447 #endif /* DEBUG_RW */
2448 return; // Held exclusive
2449 }
2450 break;
2451 case LCK_RW_ASSERT_NOTHELD:
2452 if ((lck->lck_rw_shared_count == 0) &&
2453 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2454 (lck->lck_rw_owner == 0)) {
2455 #ifdef DEBUG_RW
2456 assert_canlock_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2457 #endif /* DEBUG_RW */
2458 return;
2459 }
2460 break;
2461 default:
2462 break;
2463 }
2464 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2465 }
2466
2467 /*!
2468 * @function kdp_lck_rw_lock_is_acquired_exclusive
2469 *
2470 * @abstract
2471 * Checks if a rw_lock is held exclusevely.
2472 *
2473 * @discussion
2474 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2475 *
2476 * @param lck lock to check
2477 *
2478 * @returns TRUE if the lock is held exclusevely
2479 */
2480 boolean_t
kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t * lck)2481 kdp_lck_rw_lock_is_acquired_exclusive(
2482 lck_rw_t *lck)
2483 {
2484 if (not_in_kdp) {
2485 panic("panic: rw lock exclusive check done outside of kernel debugger");
2486 }
2487 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2488 }
2489
2490 void
kdp_rwlck_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2491 kdp_rwlck_find_owner(
2492 __unused struct waitq *waitq,
2493 event64_t event,
2494 thread_waitinfo_t *waitinfo)
2495 {
2496 lck_rw_t *rwlck = NULL;
2497 switch (waitinfo->wait_type) {
2498 case kThreadWaitKernelRWLockRead:
2499 rwlck = READ_EVENT_TO_RWLOCK(event);
2500 break;
2501 case kThreadWaitKernelRWLockWrite:
2502 case kThreadWaitKernelRWLockUpgrade:
2503 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2504 break;
2505 default:
2506 panic("%s was called with an invalid blocking type", __FUNCTION__);
2507 break;
2508 }
2509 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2510 waitinfo->owner = thread_tid(ctid_get_thread(rwlck->lck_rw_owner));
2511 }
2512
2513 /*!
2514 * @function lck_rw_lock_would_yield_shared
2515 *
2516 * @abstract
2517 * Check whether a rw_lock currently held in shared mode would be yielded
2518 *
2519 * @discussion
2520 * This function can be used when lck_rw_lock_yield_shared() would be
2521 * inappropriate due to the need to perform additional housekeeping
2522 * prior to any yield or when the caller may wish to prematurely terminate
2523 * an operation rather than resume it after regaining the lock.
2524 *
2525 * @param lck rw_lock already held in shared mode to yield.
2526 *
2527 * @returns TRUE if the lock would yield, FALSE otherwise
2528 */
2529 __mockable
2530 bool
lck_rw_lock_would_yield_shared(lck_rw_t * lck)2531 lck_rw_lock_would_yield_shared(
2532 lck_rw_t *lck)
2533 {
2534 lck_rw_word_t word;
2535
2536 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2537
2538 word.data = ordered_load_rw(lck);
2539 if (word.want_excl || word.want_upgrade) {
2540 return true;
2541 }
2542
2543 return false;
2544 }
2545
2546 /*!
2547 * @function lck_rw_lock_yield_shared
2548 *
2549 * @abstract
2550 * Yields a rw_lock held in shared mode.
2551 *
2552 * @discussion
2553 * This function can block.
2554 * Yields the lock in case there are writers waiting.
2555 * The yield will unlock, block, and re-lock the lock in shared mode.
2556 *
2557 * @param lck rw_lock already held in shared mode to yield.
2558 * @param force_yield if set to true it will always yield irrespective of the lock status
2559 *
2560 * @returns TRUE if the lock was yield, FALSE otherwise
2561 */
2562 bool
lck_rw_lock_yield_shared(lck_rw_t * lck,boolean_t force_yield)2563 lck_rw_lock_yield_shared(
2564 lck_rw_t *lck,
2565 boolean_t force_yield)
2566 {
2567 if (lck_rw_lock_would_yield_shared(lck) || force_yield) {
2568 lck_rw_unlock_shared(lck);
2569 mutex_pause(2);
2570 lck_rw_lock_shared(lck);
2571 return true;
2572 }
2573
2574 return false;
2575 }
2576
2577 /*!
2578 * @function lck_rw_lock_would_yield_exclusive
2579 *
2580 * @abstract
2581 * Check whether a rw_lock currently held in exclusive mode would be yielded
2582 *
2583 * @discussion
2584 * This function can be used when lck_rw_lock_yield_exclusive would be
2585 * inappropriate due to the need to perform additional housekeeping
2586 * prior to any yield or when the caller may wish to prematurely terminate
2587 * an operation rather than resume it after regaining the lock.
2588 *
2589 * @param lck rw_lock already held in exclusive mode to yield.
2590 * @param mode when to yield.
2591 *
2592 * @returns TRUE if the lock would yield, FALSE otherwise
2593 */
2594 __mockable
2595 bool
lck_rw_lock_would_yield_exclusive(lck_rw_t * lck,lck_rw_yield_t mode)2596 lck_rw_lock_would_yield_exclusive(
2597 lck_rw_t *lck,
2598 lck_rw_yield_t mode)
2599 {
2600 lck_rw_word_t word;
2601 bool yield = false;
2602
2603 lck_rw_assert(lck, LCK_RW_ASSERT_EXCLUSIVE);
2604
2605 if (mode == LCK_RW_YIELD_ALWAYS) {
2606 yield = true;
2607 } else {
2608 word.data = ordered_load_rw(lck);
2609 if (word.w_waiting) {
2610 yield = true;
2611 } else if (mode == LCK_RW_YIELD_ANY_WAITER) {
2612 yield = (word.r_waiting != 0);
2613 }
2614 }
2615
2616 return yield;
2617 }
2618
2619 /*!
2620 * @function lck_rw_lock_yield_exclusive
2621 *
2622 * @abstract
2623 * Yields a rw_lock held in exclusive mode.
2624 *
2625 * @discussion
2626 * This function can block.
2627 * Yields the lock in case there are writers waiting.
2628 * The yield will unlock, block, and re-lock the lock in exclusive mode.
2629 *
2630 * @param lck rw_lock already held in exclusive mode to yield.
2631 * @param mode when to yield.
2632 *
2633 * @returns TRUE if the lock was yield, FALSE otherwise
2634 */
2635 bool
lck_rw_lock_yield_exclusive(lck_rw_t * lck,lck_rw_yield_t mode)2636 lck_rw_lock_yield_exclusive(
2637 lck_rw_t *lck,
2638 lck_rw_yield_t mode)
2639 {
2640 bool yield = lck_rw_lock_would_yield_exclusive(lck, mode);
2641
2642 if (yield) {
2643 lck_rw_unlock_exclusive(lck);
2644 mutex_pause(2);
2645 lck_rw_lock_exclusive(lck);
2646 }
2647
2648 return yield;
2649 }
2650
2651 /*!
2652 * @function lck_rw_sleep
2653 *
2654 * @abstract
2655 * Assert_wait on an event while holding the rw_lock.
2656 *
2657 * @discussion
2658 * the flags can decide how to re-acquire the lock upon wake up
2659 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2660 * and if the priority needs to be kept boosted until the lock is
2661 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2662 *
2663 * @param lck rw_lock to use to synch the assert_wait.
2664 * @param lck_sleep_action flags.
2665 * @param event event to assert_wait on.
2666 * @param interruptible wait type.
2667 */
2668 wait_result_t
lck_rw_sleep(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)2669 lck_rw_sleep(
2670 lck_rw_t *lck,
2671 lck_sleep_action_t lck_sleep_action,
2672 event_t event,
2673 wait_interrupt_t interruptible)
2674 {
2675 wait_result_t res;
2676 lck_rw_type_t lck_rw_type;
2677 thread_pri_floor_t token;
2678
2679 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2680 panic("Invalid lock sleep action %x", lck_sleep_action);
2681 }
2682
2683 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2684 /*
2685 * Although we are dropping the RW lock, the intent in most cases
2686 * is that this thread remains as an observer, since it may hold
2687 * some secondary resource, but must yield to avoid deadlock. In
2688 * this situation, make sure that the thread is boosted to the
2689 * ceiling while blocked, so that it can re-acquire the
2690 * RW lock at that priority.
2691 */
2692 token = thread_priority_floor_start();
2693 }
2694
2695 res = assert_wait(event, interruptible);
2696 if (res == THREAD_WAITING) {
2697 lck_rw_type = lck_rw_done(lck);
2698 res = thread_block(THREAD_CONTINUE_NULL);
2699 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2700 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2701 lck_rw_lock(lck, lck_rw_type);
2702 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2703 lck_rw_lock_exclusive(lck);
2704 } else {
2705 lck_rw_lock_shared(lck);
2706 }
2707 }
2708 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2709 (void)lck_rw_done(lck);
2710 }
2711
2712 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2713 thread_priority_floor_end(&token);
2714 }
2715
2716 return res;
2717 }
2718
2719 /*!
2720 * @function lck_rw_sleep_deadline
2721 *
2722 * @abstract
2723 * Assert_wait_deadline on an event while holding the rw_lock.
2724 *
2725 * @discussion
2726 * the flags can decide how to re-acquire the lock upon wake up
2727 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2728 * and if the priority needs to be kept boosted until the lock is
2729 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2730 *
2731 * @param lck rw_lock to use to synch the assert_wait.
2732 * @param lck_sleep_action flags.
2733 * @param event event to assert_wait on.
2734 * @param interruptible wait type.
2735 * @param deadline maximum time after which being woken up
2736 */
2737 wait_result_t
lck_rw_sleep_deadline(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)2738 lck_rw_sleep_deadline(
2739 lck_rw_t *lck,
2740 lck_sleep_action_t lck_sleep_action,
2741 event_t event,
2742 wait_interrupt_t interruptible,
2743 uint64_t deadline)
2744 {
2745 wait_result_t res;
2746 lck_rw_type_t lck_rw_type;
2747 thread_pri_floor_t token;
2748
2749 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2750 panic("Invalid lock sleep action %x", lck_sleep_action);
2751 }
2752
2753 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2754 token = thread_priority_floor_start();
2755 }
2756
2757 res = assert_wait_deadline(event, interruptible, deadline);
2758 if (res == THREAD_WAITING) {
2759 lck_rw_type = lck_rw_done(lck);
2760 res = thread_block(THREAD_CONTINUE_NULL);
2761 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2762 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2763 lck_rw_lock(lck, lck_rw_type);
2764 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2765 lck_rw_lock_exclusive(lck);
2766 } else {
2767 lck_rw_lock_shared(lck);
2768 }
2769 }
2770 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2771 (void)lck_rw_done(lck);
2772 }
2773
2774 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2775 thread_priority_floor_end(&token);
2776 }
2777
2778 return res;
2779 }
2780
2781 /*
2782 * Reader-writer lock promotion
2783 *
2784 * We support a limited form of reader-writer
2785 * lock promotion whose effects are:
2786 *
2787 * * Qualifying threads have decay disabled
2788 * * Scheduler priority is reset to a floor of
2789 * of their statically assigned priority
2790 * or MINPRI_RWLOCK
2791 *
2792 * The rationale is that lck_rw_ts do not have
2793 * a single owner, so we cannot apply a directed
2794 * priority boost from all waiting threads
2795 * to all holding threads without maintaining
2796 * lists of all shared owners and all waiting
2797 * threads for every lock.
2798 *
2799 * Instead (and to preserve the uncontended fast-
2800 * path), acquiring (or attempting to acquire)
2801 * a RW lock in shared or exclusive lock increments
2802 * a per-thread counter. Only if that thread stops
2803 * making forward progress (for instance blocking
2804 * on a mutex, or being preempted) do we consult
2805 * the counter and apply the priority floor.
2806 * When the thread becomes runnable again (or in
2807 * the case of preemption it never stopped being
2808 * runnable), it has the priority boost and should
2809 * be in a good position to run on the CPU and
2810 * release all RW locks (at which point the priority
2811 * boost is cleared).
2812 *
2813 * Care must be taken to ensure that priority
2814 * boosts are not retained indefinitely, since unlike
2815 * mutex priority boosts (where the boost is tied
2816 * to the mutex lifecycle), the boost is tied
2817 * to the thread and independent of any particular
2818 * lck_rw_t. Assertions are in place on return
2819 * to userspace so that the boost is not held
2820 * indefinitely.
2821 *
2822 * The routines that increment/decrement the
2823 * per-thread counter should err on the side of
2824 * incrementing any time a preemption is possible
2825 * and the lock would be visible to the rest of the
2826 * system as held (so it should be incremented before
2827 * interlocks are dropped/preemption is enabled, or
2828 * before a CAS is executed to acquire the lock).
2829 *
2830 */
2831
2832 /*!
2833 * @function lck_rw_clear_promotion
2834 *
2835 * @abstract
2836 * Undo priority promotions when the last rw_lock
2837 * is released by a thread (if a promotion was active).
2838 *
2839 * @param thread thread to demote.
2840 * @param lock object reason for the demotion.
2841 */
2842 __attribute__((noinline))
2843 static void
lck_rw_clear_promotion(thread_t thread,const void * lock)2844 lck_rw_clear_promotion(thread_t thread, const void *lock)
2845 {
2846 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
2847 spl_t s = splsched();
2848 thread_lock(thread);
2849
2850 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
2851 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED,
2852 unslide_for_kdebug(lock));
2853 }
2854
2855 thread_unlock(thread);
2856 splx(s);
2857 }
2858
2859 /*!
2860 * @function lck_rw_set_promotion_locked
2861 *
2862 * @abstract
2863 * Callout from context switch if the thread goes
2864 * off core with a positive rwlock_count.
2865 *
2866 * @discussion
2867 * Called at splsched with the thread locked.
2868 *
2869 * @param thread thread to promote.
2870 */
2871 __attribute__((always_inline))
2872 void
lck_rw_set_promotion_locked(thread_t thread)2873 lck_rw_set_promotion_locked(thread_t thread)
2874 {
2875 if (LcksOpts & LCK_OPTION_DISABLE_RW_PRIO) {
2876 return;
2877 }
2878
2879 assert(thread->rwlock_count > 0);
2880
2881 if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2882 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
2883 }
2884 }
2885
2886 __attribute__((always_inline))
2887 void
lck_rw_lock_count_inc(thread_t thread,const void * lock __unused)2888 lck_rw_lock_count_inc(thread_t thread, const void *lock __unused)
2889 {
2890 if (thread->rwlock_count++ == 0) {
2891 #if MACH_ASSERT
2892 /*
2893 * Set the ast to check that the
2894 * rwlock_count is going to be set to zero when
2895 * going back to userspace.
2896 * Set it only once when we increment it for the first time.
2897 */
2898 act_set_debug_assert();
2899 #endif
2900 }
2901 }
2902
2903 __abortlike
2904 static void
__lck_rw_lock_count_dec_panic(thread_t thread)2905 __lck_rw_lock_count_dec_panic(thread_t thread)
2906 {
2907 panic("rw lock count underflow for thread %p", thread);
2908 }
2909
2910 __attribute__((always_inline))
2911 void
lck_rw_lock_count_dec(thread_t thread,const void * lock)2912 lck_rw_lock_count_dec(thread_t thread, const void *lock)
2913 {
2914 uint32_t rwlock_count = thread->rwlock_count--;
2915
2916 if (rwlock_count == 0) {
2917 __lck_rw_lock_count_dec_panic(thread);
2918 }
2919
2920 if (__probable(rwlock_count == 1)) {
2921 /* sched_flags checked without lock, but will be rechecked while clearing */
2922 if (__improbable(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2923 lck_rw_clear_promotion(thread, lock);
2924 }
2925 }
2926 }
2927