1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 #define LOCK_PRIVATE 1
57 #include <debug.h>
58 #include <kern/locks_internal.h>
59 #include <kern/lock_stat.h>
60 #include <kern/locks.h>
61 #include <kern/zalloc.h>
62 #include <kern/thread.h>
63 #include <kern/processor.h>
64 #include <kern/sched_prim.h>
65 #include <kern/debug.h>
66 #include <machine/atomic.h>
67 #include <machine/machine_cpu.h>
68
69 KALLOC_TYPE_DEFINE(KT_LCK_RW, lck_rw_t, KT_PRIV_ACCT);
70
71 #define LCK_RW_WRITER_EVENT(lck) (event_t)((uintptr_t)(lck)+1)
72 #define LCK_RW_READER_EVENT(lck) (event_t)((uintptr_t)(lck)+2)
73 #define WRITE_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-1))
74 #define READ_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-2))
75
76 #if CONFIG_DTRACE
77 #define DTRACE_RW_SHARED 0x0 //reader
78 #define DTRACE_RW_EXCL 0x1 //writer
79 #define DTRACE_NO_FLAG 0x0 //not applicable
80 #endif /* CONFIG_DTRACE */
81
82 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
83 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
84 #define LCK_RW_LCK_SHARED_CODE 0x102
85 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
86 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
87 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
88
89 #if __x86_64__
90 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
91 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
92 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
93 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
94 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
95 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
96 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
97 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
98 #endif
99
100 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
101 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
102
103 #define ordered_load_rw(lock) os_atomic_load(&(lock)->lck_rw_data, compiler_acq_rel)
104 #define ordered_store_rw(lock, value) os_atomic_store(&(lock)->lck_rw_data, (value), compiler_acq_rel)
105 #define ordered_store_rw_owner(lock, value) os_atomic_store(&(lock)->lck_rw_owner, (value), compiler_acq_rel)
106
107 #ifdef DEBUG_RW
108
109 STATIC_IF_KEY_DEFINE_TRUE(lck_rw_assert);
110
111 static TUNABLE(bool, lck_rw_recursive_shared_assert_74048094, "lck_rw_recursive_shared_assert", false);
112 SECURITY_READ_ONLY_EARLY(vm_packing_params_t) rwlde_caller_packing_params =
113 VM_PACKING_PARAMS(LCK_RW_CALLER_PACKED);
114
115 #define set_rwlde_caller_packed(entry, caller) ((entry)->rwlde_caller_packed = VM_PACK_POINTER((vm_offset_t)caller, LCK_RW_CALLER_PACKED))
116 #define get_rwlde_caller(entry) ((void*)VM_UNPACK_POINTER(entry->rwlde_caller_packed, LCK_RW_CALLER_PACKED))
117
118 #endif /* DEBUG_RW */
119
120 /*!
121 * @function lck_rw_alloc_init
122 *
123 * @abstract
124 * Allocates and initializes a rw_lock_t.
125 *
126 * @discussion
127 * The function can block. See lck_rw_init() for initialization details.
128 *
129 * @param grp lock group to associate with the lock.
130 * @param attr lock attribute to initialize the lock.
131 *
132 * @returns NULL or the allocated lock
133 */
134 lck_rw_t *
lck_rw_alloc_init(lck_grp_t * grp,lck_attr_t * attr)135 lck_rw_alloc_init(
136 lck_grp_t *grp,
137 lck_attr_t *attr)
138 {
139 lck_rw_t *lck;
140
141 lck = zalloc_flags(KT_LCK_RW, Z_WAITOK | Z_ZERO);
142 lck_rw_init(lck, grp, attr);
143 return lck;
144 }
145
146 /*!
147 * @function lck_rw_init
148 *
149 * @abstract
150 * Initializes a rw_lock_t.
151 *
152 * @discussion
153 * Usage statistics for the lock are going to be added to the lock group provided.
154 *
155 * The lock attribute can be used to specify the lock contention behaviour.
156 * RW_WRITER_PRIORITY is the default behaviour (LCK_ATTR_NULL defaults to RW_WRITER_PRIORITY)
157 * and lck_attr_rw_shared_priority() can be used to set the behaviour to RW_SHARED_PRIORITY.
158 *
159 * RW_WRITER_PRIORITY gives priority to the writers upon contention with the readers;
160 * if the lock is held and a writer starts waiting for the lock, readers will not be able
161 * to acquire the lock until all writers stop contending. Readers could
162 * potentially starve.
163 * RW_SHARED_PRIORITY gives priority to the readers upon contention with the writers:
164 * unleass the lock is held in exclusive mode, readers will always be able to acquire the lock.
165 * Readers can lock a shared lock even if there are writers waiting. Writers could potentially
166 * starve.
167 *
168 * @param lck lock to initialize.
169 * @param grp lock group to associate with the lock.
170 * @param attr lock attribute to initialize the lock.
171 *
172 */
173 void
lck_rw_init(lck_rw_t * lck,lck_grp_t * grp,lck_attr_t * attr)174 lck_rw_init(
175 lck_rw_t *lck,
176 lck_grp_t *grp,
177 lck_attr_t *attr)
178 {
179 /* keep this so that the lck_type_t type is referenced for lldb */
180 lck_type_t type = LCK_TYPE_RW;
181
182 if (attr == LCK_ATTR_NULL) {
183 attr = &lck_attr_default;
184 }
185 *lck = (lck_rw_t){
186 .lck_rw_type = type,
187 .lck_rw_can_sleep = true,
188 .lck_rw_priv_excl = !(attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY),
189 };
190 lck_grp_reference(grp, &grp->lck_grp_rwcnt);
191 }
192
193 /*!
194 * @function lck_rw_free
195 *
196 * @abstract
197 * Frees a rw_lock previously allocated with lck_rw_alloc_init().
198 *
199 * @discussion
200 * The lock must be not held by any thread.
201 *
202 * @param lck rw_lock to free.
203 */
204 void
lck_rw_free(lck_rw_t * lck,lck_grp_t * grp)205 lck_rw_free(
206 lck_rw_t *lck,
207 lck_grp_t *grp)
208 {
209 lck_rw_destroy(lck, grp);
210 zfree(KT_LCK_RW, lck);
211 }
212
213 /*!
214 * @function lck_rw_destroy
215 *
216 * @abstract
217 * Destroys a rw_lock previously initialized with lck_rw_init().
218 *
219 * @discussion
220 * The lock must be not held by any thread.
221 *
222 * @param lck rw_lock to destroy.
223 */
224 void
lck_rw_destroy(lck_rw_t * lck,lck_grp_t * grp)225 lck_rw_destroy(
226 lck_rw_t *lck,
227 lck_grp_t *grp)
228 {
229 if (lck->lck_rw_type != LCK_TYPE_RW ||
230 lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
231 panic("Destroying previously destroyed lock %p", lck);
232 }
233 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
234
235 lck->lck_rw_type = LCK_TYPE_NONE;
236 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
237 lck_grp_deallocate(grp, &grp->lck_grp_rwcnt);
238 }
239
240 #ifdef DEBUG_RW
241
242 /*
243 * Best effort mechanism to debug rw_locks.
244 *
245 * This mechanism is in addition to the owner checks. The owner is set
246 * only when the lock is held in exclusive mode so the checks do not cover
247 * the cases in which the lock is held in shared mode.
248 *
249 * This mechanism tentatively stores the rw_lock acquired and its debug
250 * information on the thread struct.
251 * Just up to LCK_RW_EXPECTED_MAX_NUMBER rw lock debug information can be stored.
252 *
253 * NOTE: LCK_RW_EXPECTED_MAX_NUMBER is the expected number of rw_locks held
254 * at the same time. If a thread holds more than this number of rw_locks we
255 * will start losing debug information.
256 * Increasing LCK_RW_EXPECTED_MAX_NUMBER will increase the probability we will
257 * store the debug information but it will require more memory per thread
258 * and longer lock/unlock time.
259 *
260 * If an empty slot is found for the debug information, we record the lock
261 * otherwise we set the overflow threshold flag.
262 *
263 * If we reached the overflow threshold we might stop asserting because we cannot be sure
264 * anymore if the lock was acquired or not.
265 *
266 * Even if we reached the overflow threshold, we try to store the debug information
267 * for the new locks acquired. This can be useful in core dumps to debug
268 * possible return to userspace without unlocking and to find possible readers
269 * holding the lock.
270 */
271 #if DEBUG_RW
272
273 __static_if_init_func
274 void
lck_rw_assert_init(const char * args,uint64_t kf_ovrd)275 lck_rw_assert_init(const char *args, uint64_t kf_ovrd)
276 {
277 bool lck_rw_assert_disable = false;
278
279 if (kf_ovrd & KF_MACH_ASSERT_OVRD) {
280 lck_rw_assert_disable = true;
281 }
282
283 if (static_if_boot_arg_uint64(args, "lcks", 0) &
284 LCK_OPTION_DISABLE_RW_DEBUG) {
285 lck_rw_assert_disable = true;
286 }
287
288 if (lck_rw_assert_disable) {
289 static_if_key_disable(lck_rw_assert);
290 }
291 }
292
293 #endif /* DEBUG_RW */
294
295 static inline struct rw_lock_debug_entry *
find_lock_in_savedlocks(lck_rw_t * lock,rw_lock_debug_t * rw_locks_held)296 find_lock_in_savedlocks(lck_rw_t* lock, rw_lock_debug_t *rw_locks_held)
297 {
298 int i;
299 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
300 struct rw_lock_debug_entry *existing = &rw_locks_held->rwld_locks[i];
301 if (existing->rwlde_lock == lock) {
302 return existing;
303 }
304 }
305
306 return NULL;
307 }
308
309 __abortlike
310 static void
rwlock_slot_panic(rw_lock_debug_t * rw_locks_held)311 rwlock_slot_panic(rw_lock_debug_t *rw_locks_held)
312 {
313 panic("No empty slot found in %p slot_used %d", rw_locks_held, rw_locks_held->rwld_locks_saved);
314 }
315
316 static inline struct rw_lock_debug_entry *
find_empty_slot(rw_lock_debug_t * rw_locks_held)317 find_empty_slot(rw_lock_debug_t *rw_locks_held)
318 {
319 int i;
320 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
321 struct rw_lock_debug_entry *entry = &rw_locks_held->rwld_locks[i];
322 if (entry->rwlde_lock == NULL) {
323 return entry;
324 }
325 }
326 rwlock_slot_panic(rw_locks_held);
327 }
328
329 __abortlike
330 static void
canlock_rwlock_panic(lck_rw_t * lock,thread_t thread,struct rw_lock_debug_entry * entry)331 canlock_rwlock_panic(lck_rw_t* lock, thread_t thread, struct rw_lock_debug_entry *entry)
332 {
333 panic("RW lock %p already held by %p caller %p mode_count %d state 0x%x owner 0x%p ",
334 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
335 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
336 }
337
338 __attribute__((noinline))
339 static void
assert_canlock_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)340 assert_canlock_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
341 {
342 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
343 if (__probable(rw_locks_held->rwld_locks_acquired == 0)) {
344 //no locks saved, safe to lock
345 return;
346 }
347
348 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
349 if (__improbable(entry != NULL)) {
350 boolean_t can_be_shared_recursive;
351 if (lck_rw_recursive_shared_assert_74048094) {
352 can_be_shared_recursive = (lock->lck_rw_priv_excl == 0);
353 } else {
354 /* currently rw_lock_shared is called recursively,
355 * until the code is fixed allow to lock
356 * recursively in shared mode
357 */
358 can_be_shared_recursive = TRUE;
359 }
360 if ((type == LCK_RW_TYPE_SHARED) && can_be_shared_recursive && entry->rwlde_mode_count >= 1) {
361 return;
362 }
363 canlock_rwlock_panic(lock, thread, entry);
364 }
365 }
366
367 static inline void
assert_canlock_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)368 assert_canlock_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
369 {
370 if (lck_rw_assert_enabled()) {
371 assert_canlock_rwlock_slow(lock, thread, type);
372 }
373 }
374
375 __abortlike
376 static void
held_rwlock_notheld_panic(lck_rw_t * lock,thread_t thread)377 held_rwlock_notheld_panic(lck_rw_t* lock, thread_t thread)
378 {
379 panic("RW lock %p not held by %p", lock, thread);
380 }
381
382 __abortlike
383 static void
held_rwlock_notheld_with_info_panic(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,struct rw_lock_debug_entry * entry)384 held_rwlock_notheld_with_info_panic(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, struct rw_lock_debug_entry *entry)
385 {
386 if (type == LCK_RW_TYPE_EXCLUSIVE) {
387 panic("RW lock %p not held in exclusive by %p caller %p read %d state 0x%x owner 0x%p ",
388 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
389 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
390 } else {
391 panic("RW lock %p not held in shared by %p caller %p read %d state 0x%x owner 0x%p ",
392 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
393 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
394 }
395 }
396
397 __attribute__((noinline))
398 static void
assert_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)399 assert_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
400 {
401 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
402
403 if (__improbable(rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_locks_saved == 0)) {
404 if (rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_overflow == 0) {
405 held_rwlock_notheld_panic(lock, thread);
406 }
407 return;
408 }
409
410 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
411 if (__probable(entry != NULL)) {
412 if (type == LCK_RW_TYPE_EXCLUSIVE && entry->rwlde_mode_count != -1) {
413 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
414 } else {
415 if (type == LCK_RW_TYPE_SHARED && entry->rwlde_mode_count <= 0) {
416 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
417 }
418 }
419 } else {
420 if (rw_locks_held->rwld_overflow == 0) {
421 held_rwlock_notheld_panic(lock, thread);
422 }
423 }
424 }
425
426 static inline void
assert_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)427 assert_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
428 {
429 if (lck_rw_assert_enabled()) {
430 assert_held_rwlock_slow(lock, thread, type);
431 }
432 }
433
434 __attribute__((noinline))
435 static void
change_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)436 change_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
437 {
438 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
439 if (__improbable(rw_locks_held->rwld_locks_saved == 0)) {
440 if (rw_locks_held->rwld_overflow == 0) {
441 held_rwlock_notheld_panic(lock, thread);
442 }
443 return;
444 }
445
446 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
447 if (__probable(entry != NULL)) {
448 if (typeFrom == LCK_RW_TYPE_SHARED) {
449 //We are upgrading
450 assertf(entry->rwlde_mode_count == 1,
451 "RW lock %p not held by a single shared when upgrading "
452 "by %p caller %p read %d state 0x%x owner 0x%p ",
453 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
454 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
455 entry->rwlde_mode_count = -1;
456 set_rwlde_caller_packed(entry, caller);
457 } else {
458 //We are downgrading
459 assertf(entry->rwlde_mode_count == -1,
460 "RW lock %p not held in write mode when downgrading "
461 "by %p caller %p read %d state 0x%x owner 0x%p ",
462 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
463 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
464 entry->rwlde_mode_count = 1;
465 set_rwlde_caller_packed(entry, caller);
466 }
467 return;
468 }
469
470 if (rw_locks_held->rwld_overflow == 0) {
471 held_rwlock_notheld_panic(lock, thread);
472 }
473
474 if (rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER) {
475 //array is full
476 return;
477 }
478
479 struct rw_lock_debug_entry *null_entry = find_empty_slot(rw_locks_held);
480 null_entry->rwlde_lock = lock;
481 set_rwlde_caller_packed(null_entry, caller);
482 if (typeFrom == LCK_RW_TYPE_SHARED) {
483 null_entry->rwlde_mode_count = -1;
484 } else {
485 null_entry->rwlde_mode_count = 1;
486 }
487 rw_locks_held->rwld_locks_saved++;
488 }
489
490 static inline void
change_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)491 change_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
492 {
493 if (lck_rw_assert_enabled()) {
494 change_held_rwlock_slow(lock, thread, typeFrom, caller);
495 }
496 }
497
498 __abortlike
499 static void
add_held_rwlock_too_many_panic(thread_t thread)500 add_held_rwlock_too_many_panic(thread_t thread)
501 {
502 panic("RW lock too many rw locks held, rwld_locks_acquired maxed out for thread %p", thread);
503 }
504
505 static __attribute__((noinline)) void
add_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)506 add_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
507 {
508 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
509 struct rw_lock_debug_entry *null_entry;
510 if (__improbable(rw_locks_held->rwld_locks_acquired == UINT32_MAX)) {
511 add_held_rwlock_too_many_panic(thread);
512 }
513 rw_locks_held->rwld_locks_acquired++;
514
515 if (type == LCK_RW_TYPE_EXCLUSIVE) {
516 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
517 //array is full
518 rw_locks_held->rwld_overflow = 1;
519 return;
520 }
521 null_entry = find_empty_slot(rw_locks_held);
522 null_entry->rwlde_lock = lock;
523 set_rwlde_caller_packed(null_entry, caller);
524 null_entry->rwlde_mode_count = -1;
525 rw_locks_held->rwld_locks_saved++;
526 return;
527 } else {
528 if (__probable(rw_locks_held->rwld_locks_saved == 0)) {
529 //array is empty
530 goto add_shared;
531 }
532
533 boolean_t allow_shared_recursive;
534 if (lck_rw_recursive_shared_assert_74048094) {
535 allow_shared_recursive = (lock->lck_rw_priv_excl == 0);
536 } else {
537 allow_shared_recursive = TRUE;
538 }
539 if (allow_shared_recursive) {
540 //It could be already locked in shared mode
541 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
542 if (entry != NULL) {
543 assert(entry->rwlde_mode_count > 0);
544 assertf(entry->rwlde_mode_count != INT8_MAX,
545 "RW lock %p with too many recursive shared held "
546 "from %p caller %p read %d state 0x%x owner 0x%p",
547 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
548 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
549 entry->rwlde_mode_count += 1;
550 return;
551 }
552 }
553
554 //none of the locks were a match
555 //try to add a new entry
556 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
557 //array is full
558 rw_locks_held->rwld_overflow = 1;
559 return;
560 }
561
562 add_shared:
563 null_entry = find_empty_slot(rw_locks_held);
564 null_entry->rwlde_lock = lock;
565 set_rwlde_caller_packed(null_entry, caller);
566 null_entry->rwlde_mode_count = 1;
567 rw_locks_held->rwld_locks_saved++;
568 }
569 }
570
571 static inline void
add_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)572 add_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
573 {
574 if (lck_rw_assert_enabled()) {
575 add_held_rwlock_slow(lock, thread, type, caller);
576 }
577 }
578
579 static void
remove_held_rwlock_slow(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)580 remove_held_rwlock_slow(lck_rw_t *lock, thread_t thread, lck_rw_type_t type)
581 {
582 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
583 if (__improbable(rw_locks_held->rwld_locks_acquired == 0)) {
584 return;
585 }
586 rw_locks_held->rwld_locks_acquired--;
587
588 if (rw_locks_held->rwld_locks_saved == 0) {
589 assert(rw_locks_held->rwld_overflow == 1);
590 goto out;
591 }
592
593 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
594 if (__probable(entry != NULL)) {
595 if (type == LCK_RW_TYPE_EXCLUSIVE) {
596 assert(entry->rwlde_mode_count == -1);
597 entry->rwlde_mode_count = 0;
598 } else {
599 assert(entry->rwlde_mode_count > 0);
600 entry->rwlde_mode_count--;
601 if (entry->rwlde_mode_count > 0) {
602 goto out;
603 }
604 }
605 entry->rwlde_caller_packed = 0;
606 entry->rwlde_lock = NULL;
607 rw_locks_held->rwld_locks_saved--;
608 } else {
609 assert(rw_locks_held->rwld_overflow == 1);
610 }
611
612 out:
613 if (rw_locks_held->rwld_locks_acquired == 0) {
614 rw_locks_held->rwld_overflow = 0;
615 }
616 return;
617 }
618
619 static inline void
remove_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)620 remove_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
621 {
622 if (lck_rw_assert_enabled()) {
623 remove_held_rwlock_slow(lock, thread, type);
624 }
625 }
626 #endif /* DEBUG_RW */
627
628 /*
629 * We disable interrupts while holding the RW interlock to prevent an
630 * interrupt from exacerbating hold time.
631 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
632 */
633 static inline boolean_t
lck_interlock_lock(lck_rw_t * lck)634 lck_interlock_lock(
635 lck_rw_t *lck)
636 {
637 boolean_t istate;
638
639 istate = ml_set_interrupts_enabled(FALSE);
640 lck_rw_ilk_lock(lck);
641 return istate;
642 }
643
644 static inline void
lck_interlock_unlock(lck_rw_t * lck,boolean_t istate)645 lck_interlock_unlock(
646 lck_rw_t *lck,
647 boolean_t istate)
648 {
649 lck_rw_ilk_unlock(lck);
650 ml_set_interrupts_enabled(istate);
651 }
652
653 /*
654 * compute the deadline to spin against when
655 * waiting for a change of state on a lck_rw_t
656 */
657 static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t * lck)658 lck_rw_deadline_for_spin(
659 lck_rw_t *lck)
660 {
661 lck_rw_word_t word;
662
663 word.data = ordered_load_rw(lck);
664 if (word.can_sleep) {
665 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
666 /*
667 * there are already threads waiting on this lock... this
668 * implies that they have spun beyond their deadlines waiting for
669 * the desired state to show up so we will not bother spinning at this time...
670 * or
671 * the current number of threads sharing this lock exceeds our capacity to run them
672 * concurrently and since all states we're going to spin for require the rw_shared_count
673 * to be at 0, we'll not bother spinning since the latency for this to happen is
674 * unpredictable...
675 */
676 return mach_absolute_time();
677 }
678 return mach_absolute_time() + os_atomic_load(&MutexSpin, relaxed);
679 } else {
680 return mach_absolute_time() + (100000LL * 1000000000LL);
681 }
682 }
683
684 /*
685 * This inline is used when busy-waiting for an rw lock.
686 * If interrupts were disabled when the lock primitive was called,
687 * we poll the IPI handler for pending tlb flushes in x86.
688 */
689 static inline void
lck_rw_lock_pause(boolean_t interrupts_enabled)690 lck_rw_lock_pause(
691 boolean_t interrupts_enabled)
692 {
693 #if X86_64
694 if (!interrupts_enabled) {
695 handle_pending_TLB_flushes();
696 }
697 cpu_pause();
698 #else
699 (void) interrupts_enabled;
700 wait_for_event();
701 #endif
702 }
703
704 typedef enum __enum_closed {
705 LCK_RW_DRAIN_S_DRAINED = 0,
706 LCK_RW_DRAIN_S_NOT_DRAINED = 1,
707 LCK_RW_DRAIN_S_EARLY_RETURN = 2,
708 LCK_RW_DRAIN_S_TIMED_OUT = 3,
709 } lck_rw_drain_state_t;
710
711 static lck_rw_drain_state_t
712 lck_rw_drain_status(
713 lck_rw_t *lock,
714 uint32_t status_mask,
715 boolean_t wait,
716 bool (^lock_pause)(void))
717 {
718 uint64_t deadline = 0;
719 uint32_t data;
720 boolean_t istate = FALSE;
721
722 if (wait) {
723 deadline = lck_rw_deadline_for_spin(lock);
724 #if __x86_64__
725 istate = ml_get_interrupts_enabled();
726 #endif
727 }
728
729 for (;;) {
730 #if __x86_64__
731 data = os_atomic_load(&lock->lck_rw_data, relaxed);
732 #else
733 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
734 #endif
735 if ((data & status_mask) == 0) {
736 atomic_exchange_abort();
737 return LCK_RW_DRAIN_S_DRAINED;
738 }
739
740 if (!wait) {
741 atomic_exchange_abort();
742 return LCK_RW_DRAIN_S_NOT_DRAINED;
743 }
744
745 lck_rw_lock_pause(istate);
746
747 if (mach_absolute_time() >= deadline) {
748 return LCK_RW_DRAIN_S_TIMED_OUT;
749 }
750
751 if (lock_pause && lock_pause()) {
752 return LCK_RW_DRAIN_S_EARLY_RETURN;
753 }
754 }
755 }
756
757 /*
758 * Spin while interlock is held.
759 */
760 static inline void
lck_rw_interlock_spin(lck_rw_t * lock)761 lck_rw_interlock_spin(
762 lck_rw_t *lock)
763 {
764 uint32_t data, prev;
765
766 for (;;) {
767 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_relaxed);
768 if (data & LCK_RW_INTERLOCK) {
769 #if __x86_64__
770 cpu_pause();
771 #else
772 wait_for_event();
773 #endif
774 } else {
775 atomic_exchange_abort();
776 return;
777 }
778 }
779 }
780
781 #define LCK_RW_GRAB_WANT 0
782 #define LCK_RW_GRAB_SHARED 1
783
784 typedef enum __enum_closed __enum_options {
785 LCK_RW_GRAB_F_SHARED = 0x0, // Not really a flag obviously but makes call sites more readable.
786 LCK_RW_GRAB_F_WANT_EXCL = 0x1,
787 LCK_RW_GRAB_F_WAIT = 0x2,
788 } lck_rw_grab_flags_t;
789
790 typedef enum __enum_closed {
791 LCK_RW_GRAB_S_NOT_LOCKED = 0,
792 LCK_RW_GRAB_S_LOCKED = 1,
793 LCK_RW_GRAB_S_EARLY_RETURN = 2,
794 LCK_RW_GRAB_S_TIMED_OUT = 3,
795 } lck_rw_grab_state_t;
796
797 static lck_rw_grab_state_t
798 lck_rw_grab(
799 lck_rw_t *lock,
800 lck_rw_grab_flags_t flags,
801 bool (^lock_pause)(void))
802 {
803 uint64_t deadline = 0;
804 uint32_t data, prev;
805 boolean_t do_exch, istate = FALSE;
806
807 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
808
809 if ((flags & LCK_RW_GRAB_F_WAIT) != 0) {
810 deadline = lck_rw_deadline_for_spin(lock);
811 #if __x86_64__
812 istate = ml_get_interrupts_enabled();
813 #endif
814 }
815
816 for (;;) {
817 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
818 if (data & LCK_RW_INTERLOCK) {
819 atomic_exchange_abort();
820 lck_rw_interlock_spin(lock);
821 continue;
822 }
823 do_exch = FALSE;
824 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
825 if ((data & LCK_RW_WANT_EXCL) == 0) {
826 data |= LCK_RW_WANT_EXCL;
827 do_exch = TRUE;
828 }
829 } else { // LCK_RW_GRAB_SHARED
830 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
831 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
832 data += LCK_RW_SHARED_READER;
833 do_exch = TRUE;
834 }
835 }
836 if (do_exch) {
837 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
838 return LCK_RW_GRAB_S_LOCKED;
839 }
840 } else {
841 if ((flags & LCK_RW_GRAB_F_WAIT) == 0) {
842 atomic_exchange_abort();
843 return LCK_RW_GRAB_S_NOT_LOCKED;
844 }
845
846 lck_rw_lock_pause(istate);
847
848 if (mach_absolute_time() >= deadline) {
849 return LCK_RW_GRAB_S_TIMED_OUT;
850 }
851 if (lock_pause && lock_pause()) {
852 return LCK_RW_GRAB_S_EARLY_RETURN;
853 }
854 }
855 }
856 }
857
858 /*
859 * The inverse of lck_rw_grab - drops either the LCK_RW_WANT_EXCL bit or
860 * decrements the reader count. Doesn't deal with waking up waiters - i.e.
861 * should only be called when can_sleep is false.
862 */
863 static void
lck_rw_drop(lck_rw_t * lock,lck_rw_grab_flags_t flags)864 lck_rw_drop(lck_rw_t *lock, lck_rw_grab_flags_t flags)
865 {
866 uint32_t data, prev;
867
868 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
869 assert(!lock->lck_rw_can_sleep);
870
871 for (;;) {
872 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
873
874 /* Interlock should never be taken when can_sleep is false. */
875 assert3u(data & LCK_RW_INTERLOCK, ==, 0);
876
877 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
878 data &= ~LCK_RW_WANT_EXCL;
879 } else {
880 data -= LCK_RW_SHARED_READER;
881 }
882
883 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
884 break;
885 }
886
887 cpu_pause();
888 }
889
890 return;
891 }
892
893 static boolean_t
894 lck_rw_lock_exclusive_gen(
895 lck_rw_t *lock,
896 bool (^lock_pause)(void))
897 {
898 __assert_only thread_t self = current_thread();
899 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
900 lck_rw_word_t word;
901 int slept = 0;
902 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
903 lck_rw_drain_state_t drain_state = LCK_RW_DRAIN_S_NOT_DRAINED;
904 wait_result_t res = 0;
905 boolean_t istate;
906
907 #if CONFIG_DTRACE
908 boolean_t dtrace_ls_initialized = FALSE;
909 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
910 uint64_t wait_interval = 0;
911 int readers_at_sleep = 0;
912 #endif
913
914 assertf(lock->lck_rw_owner != self->ctid,
915 "Lock already held state=0x%x, owner=%p",
916 ordered_load_rw(lock), self);
917
918 #ifdef DEBUG_RW
919 /*
920 * Best effort attempt to check that this thread
921 * is not already holding the lock (this checks read mode too).
922 */
923 assert_canlock_rwlock(lock, self, LCK_RW_TYPE_EXCLUSIVE);
924 #endif /* DEBUG_RW */
925
926 /*
927 * Try to acquire the lck_rw_want_excl bit.
928 */
929 while (lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL, NULL) != LCK_RW_GRAB_S_LOCKED) {
930 #if CONFIG_DTRACE
931 if (dtrace_ls_initialized == FALSE) {
932 dtrace_ls_initialized = TRUE;
933 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
934 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
935 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
936 if (dtrace_ls_enabled) {
937 /*
938 * Either sleeping or spinning is happening,
939 * start a timing of our delay interval now.
940 */
941 readers_at_sleep = lock->lck_rw_shared_count;
942 wait_interval = mach_absolute_time();
943 }
944 }
945 #endif
946
947 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START,
948 trace_lck, 0, 0, 0, 0);
949
950 grab_state = lck_rw_grab(lock, LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT, lock_pause);
951
952 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END,
953 trace_lck, 0, 0, grab_state, 0);
954
955 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
956 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
957 break;
958 }
959 /*
960 * if we get here, the deadline has expired w/o us
961 * being able to grab the lock exclusively
962 * check to see if we're allowed to do a thread_block
963 */
964 word.data = ordered_load_rw(lock);
965 if (word.can_sleep) {
966 istate = lck_interlock_lock(lock);
967 word.data = ordered_load_rw(lock);
968
969 if (word.want_excl) {
970 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
971
972 word.w_waiting = 1;
973 ordered_store_rw(lock, word.data);
974
975 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
976 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
977 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
978 lck_interlock_unlock(lock, istate);
979 if (res == THREAD_WAITING) {
980 res = thread_block(THREAD_CONTINUE_NULL);
981 slept++;
982 }
983 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
984 } else {
985 word.want_excl = 1;
986 ordered_store_rw(lock, word.data);
987 lck_interlock_unlock(lock, istate);
988 break;
989 }
990 }
991 }
992
993 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
994 assert(lock_pause);
995 return FALSE;
996 }
997
998 /*
999 * Wait for readers (and upgrades) to finish...
1000 */
1001 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
1002 #if CONFIG_DTRACE
1003 /*
1004 * Either sleeping or spinning is happening, start
1005 * a timing of our delay interval now. If we set it
1006 * to -1 we don't have accurate data so we cannot later
1007 * decide to record a dtrace spin or sleep event.
1008 */
1009 if (dtrace_ls_initialized == FALSE) {
1010 dtrace_ls_initialized = TRUE;
1011 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1012 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1013 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1014 if (dtrace_ls_enabled) {
1015 /*
1016 * Either sleeping or spinning is happening,
1017 * start a timing of our delay interval now.
1018 */
1019 readers_at_sleep = lock->lck_rw_shared_count;
1020 wait_interval = mach_absolute_time();
1021 }
1022 }
1023 #endif
1024
1025 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1026
1027 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE, lock_pause);
1028
1029 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, drain_state, 0);
1030
1031 if (drain_state == LCK_RW_DRAIN_S_DRAINED ||
1032 drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1033 break;
1034 }
1035 /*
1036 * if we get here, the deadline has expired w/o us
1037 * being able to grab the lock exclusively
1038 * check to see if we're allowed to do a thread_block
1039 */
1040 word.data = ordered_load_rw(lock);
1041 if (word.can_sleep) {
1042 istate = lck_interlock_lock(lock);
1043 word.data = ordered_load_rw(lock);
1044
1045 if (word.shared_count != 0 || word.want_upgrade) {
1046 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1047
1048 word.w_waiting = 1;
1049 ordered_store_rw(lock, word.data);
1050
1051 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1052 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1053 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1054 lck_interlock_unlock(lock, istate);
1055
1056 if (res == THREAD_WAITING) {
1057 res = thread_block(THREAD_CONTINUE_NULL);
1058 slept++;
1059 }
1060 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1061 } else {
1062 lck_interlock_unlock(lock, istate);
1063 /*
1064 * must own the lock now, since we checked for
1065 * readers or upgrade owner behind the interlock
1066 * no need for a call to 'lck_rw_drain_status'
1067 */
1068 break;
1069 }
1070 }
1071 }
1072
1073 #if CONFIG_DTRACE
1074 /*
1075 * Decide what latencies we suffered that are Dtrace events.
1076 * If we have set wait_interval, then we either spun or slept.
1077 * At least we get out from under the interlock before we record
1078 * which is the best we can do here to minimize the impact
1079 * of the tracing.
1080 * If we have set wait_interval to -1, then dtrace was not enabled when we
1081 * started sleeping/spinning so we don't record this event.
1082 */
1083 if (dtrace_ls_enabled == TRUE) {
1084 if (slept == 0) {
1085 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1086 mach_absolute_time() - wait_interval, 1);
1087 } else {
1088 /*
1089 * For the blocking case, we also record if when we blocked
1090 * it was held for read or write, and how many readers.
1091 * Notice that above we recorded this before we dropped
1092 * the interlock so the count is accurate.
1093 */
1094 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1095 mach_absolute_time() - wait_interval, 1,
1096 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1097 }
1098 }
1099 #endif /* CONFIG_DTRACE */
1100
1101 if (drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1102 lck_rw_drop(lock, LCK_RW_GRAB_F_WANT_EXCL);
1103 assert(lock_pause);
1104 return FALSE;
1105 }
1106
1107 #if CONFIG_DTRACE
1108 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1109 #endif /* CONFIG_DTRACE */
1110
1111 return TRUE;
1112 }
1113
1114 static inline void
lck_rw_lock_check_preemption(lck_rw_t * lock __unused)1115 lck_rw_lock_check_preemption(lck_rw_t *lock __unused)
1116 {
1117 assertf((get_preemption_level() == 0 && ml_get_interrupts_enabled()) ||
1118 startup_phase < STARTUP_SUB_EARLY_BOOT ||
1119 current_cpu_datap()->cpu_hibernate ||
1120 ml_is_quiescing() ||
1121 !not_in_kdp,
1122 "%s: attempt to take rwlock %p in non-preemptible or interrupt context: "
1123 "preemption level = %d, interruptible = %d", __func__, lock,
1124 get_preemption_level(), (int)ml_get_interrupts_enabled());
1125 }
1126
1127 #define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
1128 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
1129 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
1130 /*!
1131 * @function lck_rw_lock_exclusive_check_contended
1132 *
1133 * @abstract
1134 * Locks a rw_lock in exclusive mode.
1135 *
1136 * @discussion
1137 * This routine IS EXPERIMENTAL.
1138 * It's only used for the vm object lock, and use for other subsystems is UNSUPPORTED.
1139 * Note that the return value is ONLY A HEURISTIC w.r.t. the lock's contention.
1140 *
1141 * @param lock rw_lock to lock.
1142 *
1143 * @returns Returns TRUE if the thread spun or blocked while attempting to acquire the lock, FALSE
1144 * otherwise.
1145 */
1146 bool
lck_rw_lock_exclusive_check_contended(lck_rw_t * lock)1147 lck_rw_lock_exclusive_check_contended(
1148 lck_rw_t *lock)
1149 {
1150 thread_t thread = current_thread();
1151 bool contended = false;
1152
1153 if (lock->lck_rw_can_sleep) {
1154 lck_rw_lock_check_preemption(lock);
1155 lck_rw_lock_count_inc(thread, lock);
1156 } else if (get_preemption_level() == 0) {
1157 panic("Taking non-sleepable RW lock with preemption enabled");
1158 }
1159
1160 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1161 #if CONFIG_DTRACE
1162 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1163 #endif /* CONFIG_DTRACE */
1164 } else {
1165 contended = true;
1166 (void) lck_rw_lock_exclusive_gen(lock, NULL);
1167 }
1168 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1169 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1170 ordered_store_rw_owner(lock, thread->ctid);
1171
1172 #ifdef DEBUG_RW
1173 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, __builtin_return_address(0));
1174 #endif /* DEBUG_RW */
1175 return contended;
1176 }
1177
1178 __attribute__((always_inline))
1179 static boolean_t
1180 lck_rw_lock_exclusive_internal_inline(
1181 lck_rw_t *lock,
1182 void *caller,
1183 bool (^lock_pause)(void))
1184 {
1185 #pragma unused(caller)
1186 thread_t thread = current_thread();
1187
1188 if (lock->lck_rw_can_sleep) {
1189 lck_rw_lock_check_preemption(lock);
1190 lck_rw_lock_count_inc(thread, lock);
1191 } else if (get_preemption_level() == 0) {
1192 panic("Taking non-sleepable RW lock with preemption enabled");
1193 }
1194
1195 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1196 #if CONFIG_DTRACE
1197 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1198 #endif /* CONFIG_DTRACE */
1199 } else if (!lck_rw_lock_exclusive_gen(lock, lock_pause)) {
1200 /*
1201 * lck_rw_lock_exclusive_gen() should only return
1202 * early if lock_pause has been passed and
1203 * returns FALSE. lock_pause is exclusive with
1204 * lck_rw_can_sleep().
1205 */
1206 assert(!lock->lck_rw_can_sleep);
1207 return FALSE;
1208 }
1209
1210 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1211 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1212 ordered_store_rw_owner(lock, thread->ctid);
1213
1214 #if DEBUG_RW
1215 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1216 #endif /* DEBUG_RW */
1217
1218 return TRUE;
1219 }
1220
1221 __attribute__((noinline))
1222 static void
lck_rw_lock_exclusive_internal(lck_rw_t * lock,void * caller)1223 lck_rw_lock_exclusive_internal(
1224 lck_rw_t *lock,
1225 void *caller)
1226 {
1227 (void) lck_rw_lock_exclusive_internal_inline(lock, caller, NULL);
1228 }
1229
1230 /*!
1231 * @function lck_rw_lock_exclusive
1232 *
1233 * @abstract
1234 * Locks a rw_lock in exclusive mode.
1235 *
1236 * @discussion
1237 * This function can block.
1238 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1239 * can acquire it in exclusive mode.
1240 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1241 *
1242 * @param lock rw_lock to lock.
1243 */
1244 void
lck_rw_lock_exclusive(lck_rw_t * lock)1245 lck_rw_lock_exclusive(
1246 lck_rw_t *lock)
1247 {
1248 (void) lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), NULL);
1249 }
1250
1251 /*!
1252 * @function lck_rw_lock_exclusive_b
1253 *
1254 * @abstract
1255 * Locks a rw_lock in exclusive mode. Returns early if the lock can't be acquired
1256 * and the specified block returns true.
1257 *
1258 * @discussion
1259 * Identical to lck_rw_lock_exclusive() but can return early if the lock can't be
1260 * acquired and the specified block returns true. The block is called
1261 * repeatedly when waiting to acquire the lock.
1262 * Should only be called when the lock cannot sleep (i.e. when
1263 * lock->lck_rw_can_sleep is false).
1264 *
1265 * @param lock rw_lock to lock.
1266 * @param lock_pause block invoked while waiting to acquire lock
1267 *
1268 * @returns Returns TRUE if the lock is successfully taken,
1269 * FALSE if the block returns true and the lock has
1270 * not been acquired.
1271 */
1272 boolean_t
1273 lck_rw_lock_exclusive_b(
1274 lck_rw_t *lock,
1275 bool (^lock_pause)(void))
1276 {
1277 assert(!lock->lck_rw_can_sleep);
1278
1279 return lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0), lock_pause);
1280 }
1281
1282 /*
1283 * Routine: lck_rw_lock_shared_gen
1284 * Function:
1285 * Fast path code has determined that this lock
1286 * is held exclusively... this is where we spin/block
1287 * until we can acquire the lock in the shared mode
1288 */
1289 static boolean_t
1290 lck_rw_lock_shared_gen(
1291 lck_rw_t *lck,
1292 bool (^lock_pause)(void))
1293 {
1294 __assert_only thread_t self = current_thread();
1295 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1296 lck_rw_word_t word;
1297 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
1298 int slept = 0;
1299 wait_result_t res = 0;
1300 boolean_t istate;
1301
1302 #if CONFIG_DTRACE
1303 uint64_t wait_interval = 0;
1304 int readers_at_sleep = 0;
1305 boolean_t dtrace_ls_initialized = FALSE;
1306 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1307 #endif /* CONFIG_DTRACE */
1308
1309 assertf(lck->lck_rw_owner != self->ctid,
1310 "Lock already held state=0x%x, owner=%p",
1311 ordered_load_rw(lck), self);
1312
1313 #ifdef DEBUG_RW
1314 /*
1315 * Best effort attempt to check that this thread
1316 * is not already holding the lock in shared mode.
1317 */
1318 assert_canlock_rwlock(lck, self, LCK_RW_TYPE_SHARED);
1319 #endif
1320
1321 while (lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED, NULL) != LCK_RW_GRAB_S_LOCKED) {
1322 #if CONFIG_DTRACE
1323 if (dtrace_ls_initialized == FALSE) {
1324 dtrace_ls_initialized = TRUE;
1325 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1326 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1327 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1328 if (dtrace_ls_enabled) {
1329 /*
1330 * Either sleeping or spinning is happening,
1331 * start a timing of our delay interval now.
1332 */
1333 readers_at_sleep = lck->lck_rw_shared_count;
1334 wait_interval = mach_absolute_time();
1335 }
1336 }
1337 #endif
1338
1339 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1340 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
1341
1342 grab_state = lck_rw_grab(lck, LCK_RW_GRAB_F_SHARED | LCK_RW_GRAB_F_WAIT, lock_pause);
1343
1344 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1345 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, grab_state, 0);
1346
1347 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
1348 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1349 break;
1350 }
1351
1352 /*
1353 * if we get here, the deadline has expired w/o us
1354 * being able to grab the lock for read
1355 * check to see if we're allowed to do a thread_block
1356 */
1357 if (lck->lck_rw_can_sleep) {
1358 istate = lck_interlock_lock(lck);
1359
1360 word.data = ordered_load_rw(lck);
1361 if ((word.want_excl || word.want_upgrade) &&
1362 ((word.shared_count == 0) || word.priv_excl)) {
1363 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1364 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
1365
1366 word.r_waiting = 1;
1367 ordered_store_rw(lck, word.data);
1368
1369 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
1370 res = assert_wait(LCK_RW_READER_EVENT(lck),
1371 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1372 lck_interlock_unlock(lck, istate);
1373
1374 if (res == THREAD_WAITING) {
1375 res = thread_block(THREAD_CONTINUE_NULL);
1376 slept++;
1377 }
1378 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1379 trace_lck, res, slept, 0, 0);
1380 } else {
1381 word.shared_count++;
1382 ordered_store_rw(lck, word.data);
1383 lck_interlock_unlock(lck, istate);
1384 break;
1385 }
1386 }
1387 }
1388
1389 #if CONFIG_DTRACE
1390 if (dtrace_ls_enabled == TRUE) {
1391 if (slept == 0) {
1392 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1393 } else {
1394 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1395 mach_absolute_time() - wait_interval, 0,
1396 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1397 }
1398 }
1399 #endif /* CONFIG_DTRACE */
1400
1401 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1402 assert(lock_pause);
1403 return FALSE;
1404 }
1405
1406 #if CONFIG_DTRACE
1407 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1408 #endif /* CONFIG_DTRACE */
1409
1410 return TRUE;
1411 }
1412
1413 __attribute__((always_inline))
1414 static boolean_t
1415 lck_rw_lock_shared_internal_inline(
1416 lck_rw_t *lock,
1417 void *caller,
1418 bool (^lock_pause)(void))
1419 {
1420 #pragma unused(caller)
1421
1422 uint32_t data, prev;
1423 thread_t thread = current_thread();
1424 #ifdef DEBUG_RW
1425 boolean_t check_canlock = TRUE;
1426 #endif
1427
1428 if (lock->lck_rw_can_sleep) {
1429 lck_rw_lock_check_preemption(lock);
1430 lck_rw_lock_count_inc(thread, lock);
1431 } else if (get_preemption_level() == 0) {
1432 panic("Taking non-sleepable RW lock with preemption enabled");
1433 }
1434
1435 for (;;) {
1436 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1437 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1438 atomic_exchange_abort();
1439 if (!lck_rw_lock_shared_gen(lock, lock_pause)) {
1440 /*
1441 * lck_rw_lock_shared_gen() should only return
1442 * early if lock_pause has been passed and
1443 * returns FALSE. lock_pause is exclusive with
1444 * lck_rw_can_sleep().
1445 */
1446 assert(!lock->lck_rw_can_sleep);
1447 return FALSE;
1448 }
1449
1450 goto locked;
1451 }
1452 #ifdef DEBUG_RW
1453 if ((data & LCK_RW_SHARED_MASK) == 0) {
1454 /*
1455 * If the lock is uncontended,
1456 * we do not need to check if we can lock it
1457 */
1458 check_canlock = FALSE;
1459 }
1460 #endif
1461 data += LCK_RW_SHARED_READER;
1462 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1463 break;
1464 }
1465 cpu_pause();
1466 }
1467 #ifdef DEBUG_RW
1468 if (check_canlock) {
1469 /*
1470 * Best effort attempt to check that this thread
1471 * is not already holding the lock (this checks read mode too).
1472 */
1473 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1474 }
1475 #endif
1476 locked:
1477 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1478 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1479
1480 #if CONFIG_DTRACE
1481 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1482 #endif /* CONFIG_DTRACE */
1483
1484 #ifdef DEBUG_RW
1485 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1486 #endif /* DEBUG_RW */
1487
1488 return TRUE;
1489 }
1490
1491 __attribute__((noinline))
1492 static void
lck_rw_lock_shared_internal(lck_rw_t * lock,void * caller)1493 lck_rw_lock_shared_internal(
1494 lck_rw_t *lock,
1495 void *caller)
1496 {
1497 (void) lck_rw_lock_shared_internal_inline(lock, caller, NULL);
1498 }
1499
1500 /*!
1501 * @function lck_rw_lock_shared
1502 *
1503 * @abstract
1504 * Locks a rw_lock in shared mode.
1505 *
1506 * @discussion
1507 * This function can block.
1508 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1509 * can acquire it in exclusive mode.
1510 * If the lock is held in shared mode and there are no writers waiting, a reader will be able to acquire
1511 * the lock without waiting.
1512 * If the lock is held in shared mode and there is at least a writer waiting, a reader will wait
1513 * for all the writers to make progress if the lock was initialized with the default settings. Instead if
1514 * RW_SHARED_PRIORITY was selected at initialization time, a reader will never wait if the lock is held
1515 * in shared mode.
1516 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1517 *
1518 * @param lock rw_lock to lock.
1519 */
1520 void
lck_rw_lock_shared(lck_rw_t * lock)1521 lck_rw_lock_shared(
1522 lck_rw_t *lock)
1523 {
1524 (void) lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), NULL);
1525 }
1526
1527 /*!
1528 * @function lck_rw_lock_shared_b
1529 *
1530 * @abstract
1531 * Locks a rw_lock in shared mode. Returns early if the lock can't be acquired
1532 * and the specified block returns true.
1533 *
1534 * @discussion
1535 * Identical to lck_rw_lock_shared() but can return early if the lock can't be
1536 * acquired and the specified block returns true. The block is called
1537 * repeatedly when waiting to acquire the lock.
1538 * Should only be called when the lock cannot sleep (i.e. when
1539 * lock->lck_rw_can_sleep is false).
1540 *
1541 * @param lock rw_lock to lock.
1542 * @param lock_pause block invoked while waiting to acquire lock
1543 *
1544 * @returns Returns TRUE if the lock is successfully taken,
1545 * FALSE if the block returns true and the lock has
1546 * not been acquired.
1547 */
1548 boolean_t
1549 lck_rw_lock_shared_b(
1550 lck_rw_t *lock,
1551 bool (^lock_pause)(void))
1552 {
1553 assert(!lock->lck_rw_can_sleep);
1554
1555 return lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0), lock_pause);
1556 }
1557
1558 /*
1559 * Routine: lck_rw_lock_shared_to_exclusive_failure
1560 * Function:
1561 * Fast path code has already dropped our read
1562 * count and determined that someone else owns 'lck_rw_want_upgrade'
1563 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1564 * all we need to do here is determine if a wakeup is needed
1565 */
1566 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t * lck,uint32_t prior_lock_state)1567 lck_rw_lock_shared_to_exclusive_failure(
1568 lck_rw_t *lck,
1569 uint32_t prior_lock_state)
1570 {
1571 thread_t thread = current_thread();
1572
1573 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1574 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1575 /*
1576 * Someone else has requested upgrade.
1577 * Since we've released the read lock, wake
1578 * him up if he's blocked waiting
1579 */
1580 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1581 }
1582
1583 /* Check if dropping the lock means that we need to unpromote */
1584 if (lck->lck_rw_can_sleep) {
1585 lck_rw_lock_count_dec(thread, lck);
1586 }
1587
1588 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1589 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1590
1591 #ifdef DEBUG_RW
1592 remove_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
1593 #endif /* DEBUG_RW */
1594
1595 return FALSE;
1596 }
1597
1598 /*
1599 * Routine: lck_rw_lock_shared_to_exclusive_success
1600 * Function:
1601 * the fast path code has already dropped our read
1602 * count and successfully acquired 'lck_rw_want_upgrade'
1603 * we just need to wait for the rest of the readers to drain
1604 * and then we can return as the exclusive holder of this lock
1605 */
1606 static void
lck_rw_lock_shared_to_exclusive_success(lck_rw_t * lock)1607 lck_rw_lock_shared_to_exclusive_success(
1608 lck_rw_t *lock)
1609 {
1610 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1611 int slept = 0;
1612 lck_rw_word_t word;
1613 wait_result_t res;
1614 boolean_t istate;
1615 lck_rw_drain_state_t drain_state;
1616
1617 #if CONFIG_DTRACE
1618 uint64_t wait_interval = 0;
1619 int readers_at_sleep = 0;
1620 boolean_t dtrace_ls_initialized = FALSE;
1621 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1622 #endif
1623
1624 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
1625 word.data = ordered_load_rw(lock);
1626 #if CONFIG_DTRACE
1627 if (dtrace_ls_initialized == FALSE) {
1628 dtrace_ls_initialized = TRUE;
1629 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1630 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1631 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1632 if (dtrace_ls_enabled) {
1633 /*
1634 * Either sleeping or spinning is happening,
1635 * start a timing of our delay interval now.
1636 */
1637 readers_at_sleep = word.shared_count;
1638 wait_interval = mach_absolute_time();
1639 }
1640 }
1641 #endif
1642
1643 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1644 trace_lck, word.shared_count, 0, 0, 0);
1645
1646 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE, NULL);
1647
1648 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1649 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1650
1651 if (drain_state == LCK_RW_DRAIN_S_DRAINED) {
1652 break;
1653 }
1654
1655 /*
1656 * if we get here, the spin deadline in lck_rw_wait_on_status()
1657 * has expired w/o the rw_shared_count having drained to 0
1658 * check to see if we're allowed to do a thread_block
1659 */
1660 if (word.can_sleep) {
1661 istate = lck_interlock_lock(lock);
1662
1663 word.data = ordered_load_rw(lock);
1664 if (word.shared_count != 0) {
1665 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1666 trace_lck, word.shared_count, 0, 0, 0);
1667
1668 word.w_waiting = 1;
1669 ordered_store_rw(lock, word.data);
1670
1671 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1672 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1673 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1674 lck_interlock_unlock(lock, istate);
1675
1676 if (res == THREAD_WAITING) {
1677 res = thread_block(THREAD_CONTINUE_NULL);
1678 slept++;
1679 }
1680 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1681 trace_lck, res, slept, 0, 0);
1682 } else {
1683 lck_interlock_unlock(lock, istate);
1684 break;
1685 }
1686 }
1687 }
1688 #if CONFIG_DTRACE
1689 /*
1690 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1691 */
1692 if (dtrace_ls_enabled == TRUE) {
1693 if (slept == 0) {
1694 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1695 } else {
1696 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1697 mach_absolute_time() - wait_interval, 1,
1698 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1699 }
1700 }
1701 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1702 #endif
1703 }
1704
1705 /*!
1706 * @function lck_rw_lock_shared_to_exclusive
1707 *
1708 * @abstract
1709 * Upgrades a rw_lock held in shared mode to exclusive.
1710 *
1711 * @discussion
1712 * This function can block.
1713 * Only one reader at a time can upgrade to exclusive mode. If the upgrades fails the function will
1714 * return with the lock not held.
1715 * The caller needs to hold the lock in shared mode to upgrade it.
1716 *
1717 * @param lock rw_lock already held in shared mode to upgrade.
1718 *
1719 * @returns TRUE if the lock was upgraded, FALSE if it was not possible.
1720 * If the function was not able to upgrade the lock, the lock will be dropped
1721 * by the function.
1722 */
1723 boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t * lock)1724 lck_rw_lock_shared_to_exclusive(
1725 lck_rw_t *lock)
1726 {
1727 thread_t thread = current_thread();
1728 uint32_t data, prev;
1729
1730 assertf(lock->lck_rw_priv_excl != 0, "lock %p thread %p", lock, current_thread());
1731
1732 #if DEBUG_RW
1733 assert_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1734 #endif /* DEBUG_RW */
1735
1736 for (;;) {
1737 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1738 if (data & LCK_RW_INTERLOCK) {
1739 atomic_exchange_abort();
1740 lck_rw_interlock_spin(lock);
1741 continue;
1742 }
1743 if (data & LCK_RW_WANT_UPGRADE) {
1744 data -= LCK_RW_SHARED_READER;
1745 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1746 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1747 }
1748 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1749 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1750 }
1751 } else {
1752 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1753 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1754 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1755 break;
1756 }
1757 }
1758 cpu_pause();
1759 }
1760 /* we now own the WANT_UPGRADE */
1761 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1762 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1763 }
1764
1765 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1766 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1767
1768 ordered_store_rw_owner(lock, thread->ctid);
1769 #if CONFIG_DTRACE
1770 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1771 #endif /* CONFIG_DTRACE */
1772
1773 #if DEBUG_RW
1774 change_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, __builtin_return_address(0));
1775 #endif /* DEBUG_RW */
1776 return TRUE;
1777 }
1778
1779 /*
1780 * Routine: lck_rw_lock_exclusive_to_shared_gen
1781 * Function:
1782 * Fast path has already dropped
1783 * our exclusive state and bumped lck_rw_shared_count
1784 * all we need to do here is determine if anyone
1785 * needs to be awakened.
1786 */
1787 static void
lck_rw_lock_exclusive_to_shared_gen(lck_rw_t * lck,uint32_t prior_lock_state,void * caller)1788 lck_rw_lock_exclusive_to_shared_gen(
1789 lck_rw_t *lck,
1790 uint32_t prior_lock_state,
1791 void *caller)
1792 {
1793 #pragma unused(caller)
1794 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1795 lck_rw_word_t fake_lck;
1796
1797 /*
1798 * prior_lock state is a snapshot of the 1st word of the
1799 * lock in question... we'll fake up a pointer to it
1800 * and carefully not access anything beyond whats defined
1801 * in the first word of a lck_rw_t
1802 */
1803 fake_lck.data = prior_lock_state;
1804
1805 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1806 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1807
1808 /*
1809 * don't wake up anyone waiting to take the lock exclusively
1810 * since we hold a read count... when the read count drops to 0,
1811 * the writers will be woken.
1812 *
1813 * wake up any waiting readers if we don't have any writers waiting,
1814 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1815 */
1816 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1817 thread_wakeup(LCK_RW_READER_EVENT(lck));
1818 }
1819
1820 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1821 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1822
1823 #if CONFIG_DTRACE
1824 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1825 #endif
1826
1827 #if DEBUG_RW
1828 thread_t thread = current_thread();
1829 change_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1830 #endif /* DEBUG_RW */
1831 }
1832
1833 /*!
1834 * @function lck_rw_lock_exclusive_to_shared
1835 *
1836 * @abstract
1837 * Downgrades a rw_lock held in exclusive mode to shared.
1838 *
1839 * @discussion
1840 * The caller needs to hold the lock in exclusive mode to be able to downgrade it.
1841 *
1842 * @param lock rw_lock already held in exclusive mode to downgrade.
1843 */
1844 void
lck_rw_lock_exclusive_to_shared(lck_rw_t * lock)1845 lck_rw_lock_exclusive_to_shared(
1846 lck_rw_t *lock)
1847 {
1848 uint32_t data, prev;
1849
1850 assertf(lock->lck_rw_owner == current_thread()->ctid,
1851 "state=0x%x, owner=%p", lock->lck_rw_data,
1852 ctid_get_thread_unsafe(lock->lck_rw_owner));
1853 ordered_store_rw_owner(lock, 0);
1854
1855 for (;;) {
1856 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1857 if (data & LCK_RW_INTERLOCK) {
1858 atomic_exchange_abort();
1859 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1860 continue;
1861 }
1862 data += LCK_RW_SHARED_READER;
1863 if (data & LCK_RW_WANT_UPGRADE) {
1864 data &= ~(LCK_RW_WANT_UPGRADE);
1865 } else {
1866 data &= ~(LCK_RW_WANT_EXCL);
1867 }
1868 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1869 data &= ~(LCK_RW_W_WAITING);
1870 }
1871 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1872 break;
1873 }
1874 cpu_pause();
1875 }
1876 lck_rw_lock_exclusive_to_shared_gen(lock, prev, __builtin_return_address(0));
1877 }
1878
1879 /*
1880 * Very sad hack, but the codegen for lck_rw_lock
1881 * is very unhappy with the combination of __builtin_return_address()
1882 * and a noreturn function. For some reason it adds more frames
1883 * than it should. rdar://76570684
1884 */
1885 void
1886 _lck_rw_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1887 #pragma clang diagnostic push
1888 #pragma clang diagnostic ignored "-Wmissing-noreturn"
1889 __attribute__((noinline, weak))
1890 void
_lck_rw_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1891 _lck_rw_lock_type_panic(
1892 lck_rw_t *lck,
1893 lck_rw_type_t lck_rw_type)
1894 {
1895 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1896 }
1897 #pragma clang diagnostic pop
1898
1899 /*!
1900 * @function lck_rw_lock
1901 *
1902 * @abstract
1903 * Locks a rw_lock with the specified type.
1904 *
1905 * @discussion
1906 * See lck_rw_lock_shared() or lck_rw_lock_exclusive() for more details.
1907 *
1908 * @param lck rw_lock to lock.
1909 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1910 */
1911 void
lck_rw_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1912 lck_rw_lock(
1913 lck_rw_t *lck,
1914 lck_rw_type_t lck_rw_type)
1915 {
1916 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1917 return lck_rw_lock_shared_internal(lck, __builtin_return_address(0));
1918 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1919 return lck_rw_lock_exclusive_internal(lck, __builtin_return_address(0));
1920 }
1921 _lck_rw_lock_type_panic(lck, lck_rw_type);
1922 }
1923
1924 __attribute__((always_inline))
1925 static boolean_t
lck_rw_try_lock_shared_internal_inline(lck_rw_t * lock,void * caller)1926 lck_rw_try_lock_shared_internal_inline(
1927 lck_rw_t *lock,
1928 void *caller)
1929 {
1930 #pragma unused(caller)
1931
1932 uint32_t data, prev;
1933 thread_t thread = current_thread();
1934 #ifdef DEBUG_RW
1935 boolean_t check_canlock = TRUE;
1936 #endif
1937
1938 for (;;) {
1939 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1940 if (data & LCK_RW_INTERLOCK) {
1941 atomic_exchange_abort();
1942 lck_rw_interlock_spin(lock);
1943 continue;
1944 }
1945 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1946 atomic_exchange_abort();
1947 return FALSE; /* lock is busy */
1948 }
1949 #ifdef DEBUG_RW
1950 if ((data & LCK_RW_SHARED_MASK) == 0) {
1951 /*
1952 * If the lock is uncontended,
1953 * we do not need to check if we can lock it
1954 */
1955 check_canlock = FALSE;
1956 }
1957 #endif
1958 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1959 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1960 break;
1961 }
1962 cpu_pause();
1963 }
1964 #ifdef DEBUG_RW
1965 if (check_canlock) {
1966 /*
1967 * Best effort attempt to check that this thread
1968 * is not already holding the lock (this checks read mode too).
1969 */
1970 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1971 }
1972 #endif
1973 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1974 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1975
1976 if (lock->lck_rw_can_sleep) {
1977 lck_rw_lock_count_inc(thread, lock);
1978 } else if (get_preemption_level() == 0) {
1979 panic("Taking non-sleepable RW lock with preemption enabled");
1980 }
1981
1982 #if CONFIG_DTRACE
1983 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1984 #endif /* CONFIG_DTRACE */
1985
1986 #ifdef DEBUG_RW
1987 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1988 #endif /* DEBUG_RW */
1989 return TRUE;
1990 }
1991
1992 __attribute__((noinline))
1993 static boolean_t
lck_rw_try_lock_shared_internal(lck_rw_t * lock,void * caller)1994 lck_rw_try_lock_shared_internal(
1995 lck_rw_t *lock,
1996 void *caller)
1997 {
1998 return lck_rw_try_lock_shared_internal_inline(lock, caller);
1999 }
2000
2001 /*!
2002 * @function lck_rw_try_lock_shared
2003 *
2004 * @abstract
2005 * Tries to locks a rw_lock in read mode.
2006 *
2007 * @discussion
2008 * This function will return and not block in case the lock is already held.
2009 * See lck_rw_lock_shared for more details.
2010 *
2011 * @param lock rw_lock to lock.
2012 *
2013 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2014 */
2015 boolean_t
lck_rw_try_lock_shared(lck_rw_t * lock)2016 lck_rw_try_lock_shared(
2017 lck_rw_t *lock)
2018 {
2019 return lck_rw_try_lock_shared_internal_inline(lock, __builtin_return_address(0));
2020 }
2021
2022 __attribute__((always_inline))
2023 static boolean_t
lck_rw_try_lock_exclusive_internal_inline(lck_rw_t * lock,void * caller)2024 lck_rw_try_lock_exclusive_internal_inline(
2025 lck_rw_t *lock,
2026 void *caller)
2027 {
2028 #pragma unused(caller)
2029 uint32_t data, prev;
2030
2031 for (;;) {
2032 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
2033 if (data & LCK_RW_INTERLOCK) {
2034 atomic_exchange_abort();
2035 lck_rw_interlock_spin(lock);
2036 continue;
2037 }
2038 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
2039 atomic_exchange_abort();
2040 return FALSE;
2041 }
2042 data |= LCK_RW_WANT_EXCL;
2043 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
2044 break;
2045 }
2046 cpu_pause();
2047 }
2048 thread_t thread = current_thread();
2049
2050 if (lock->lck_rw_can_sleep) {
2051 lck_rw_lock_count_inc(thread, lock);
2052 } else if (get_preemption_level() == 0) {
2053 panic("Taking non-sleepable RW lock with preemption enabled");
2054 }
2055
2056 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
2057 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
2058
2059 ordered_store_rw_owner(lock, thread->ctid);
2060 #if CONFIG_DTRACE
2061 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
2062 #endif /* CONFIG_DTRACE */
2063
2064 #ifdef DEBUG_RW
2065 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
2066 #endif /* DEBUG_RW */
2067 return TRUE;
2068 }
2069
2070 __attribute__((noinline))
2071 static boolean_t
lck_rw_try_lock_exclusive_internal(lck_rw_t * lock,void * caller)2072 lck_rw_try_lock_exclusive_internal(
2073 lck_rw_t *lock,
2074 void *caller)
2075 {
2076 return lck_rw_try_lock_exclusive_internal_inline(lock, caller);
2077 }
2078
2079 /*!
2080 * @function lck_rw_try_lock_exclusive
2081 *
2082 * @abstract
2083 * Tries to locks a rw_lock in write mode.
2084 *
2085 * @discussion
2086 * This function will return and not block in case the lock is already held.
2087 * See lck_rw_lock_exclusive for more details.
2088 *
2089 * @param lock rw_lock to lock.
2090 *
2091 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2092 */
2093 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t * lock)2094 lck_rw_try_lock_exclusive(
2095 lck_rw_t *lock)
2096 {
2097 return lck_rw_try_lock_exclusive_internal_inline(lock, __builtin_return_address(0));
2098 }
2099
2100 /*
2101 * Very sad hack, but the codegen for lck_rw_try_lock
2102 * is very unhappy with the combination of __builtin_return_address()
2103 * and a noreturn function. For some reason it adds more frames
2104 * than it should. rdar://76570684
2105 */
2106 boolean_t
2107 _lck_rw_try_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
2108 #pragma clang diagnostic push
2109 #pragma clang diagnostic ignored "-Wmissing-noreturn"
2110 __attribute__((noinline, weak))
2111 boolean_t
_lck_rw_try_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2112 _lck_rw_try_lock_type_panic(
2113 lck_rw_t *lck,
2114 lck_rw_type_t lck_rw_type)
2115 {
2116 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
2117 }
2118 #pragma clang diagnostic pop
2119
2120 /*!
2121 * @function lck_rw_try_lock
2122 *
2123 * @abstract
2124 * Tries to locks a rw_lock with the specified type.
2125 *
2126 * @discussion
2127 * This function will return and not wait/block in case the lock is already held.
2128 * See lck_rw_try_lock_shared() or lck_rw_try_lock_exclusive() for more details.
2129 *
2130 * @param lck rw_lock to lock.
2131 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2132 *
2133 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2134 */
2135 boolean_t
lck_rw_try_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2136 lck_rw_try_lock(
2137 lck_rw_t *lck,
2138 lck_rw_type_t lck_rw_type)
2139 {
2140 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2141 return lck_rw_try_lock_shared_internal(lck, __builtin_return_address(0));
2142 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2143 return lck_rw_try_lock_exclusive_internal(lck, __builtin_return_address(0));
2144 }
2145 return _lck_rw_try_lock_type_panic(lck, lck_rw_type);
2146 }
2147
2148 /*
2149 * Routine: lck_rw_done_gen
2150 *
2151 * prior_lock_state is the value in the 1st
2152 * word of the lock at the time of a successful
2153 * atomic compare and exchange with the new value...
2154 * it represents the state of the lock before we
2155 * decremented the rw_shared_count or cleared either
2156 * rw_want_upgrade or rw_want_write and
2157 * the lck_x_waiting bits... since the wrapper
2158 * routine has already changed the state atomically,
2159 * we just need to decide if we should
2160 * wake up anyone and what value to return... we do
2161 * this by examining the state of the lock before
2162 * we changed it
2163 */
2164 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t * lck,uint32_t prior_lock_state)2165 lck_rw_done_gen(
2166 lck_rw_t *lck,
2167 uint32_t prior_lock_state)
2168 {
2169 lck_rw_word_t fake_lck;
2170 lck_rw_type_t lock_type;
2171 thread_t thread;
2172
2173 /*
2174 * prior_lock state is a snapshot of the 1st word of the
2175 * lock in question... we'll fake up a pointer to it
2176 * and carefully not access anything beyond whats defined
2177 * in the first word of a lck_rw_t
2178 */
2179 fake_lck.data = prior_lock_state;
2180
2181 if (fake_lck.shared_count <= 1) {
2182 if (fake_lck.w_waiting) {
2183 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2184 }
2185
2186 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
2187 thread_wakeup(LCK_RW_READER_EVENT(lck));
2188 }
2189 }
2190 if (fake_lck.shared_count) {
2191 lock_type = LCK_RW_TYPE_SHARED;
2192 } else {
2193 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2194 }
2195
2196 /* Check if dropping the lock means that we need to unpromote */
2197 thread = current_thread();
2198 if (fake_lck.can_sleep) {
2199 lck_rw_lock_count_dec(thread, lck);
2200 }
2201
2202 #if CONFIG_DTRACE
2203 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2204 #endif
2205
2206 #ifdef DEBUG_RW
2207 remove_held_rwlock(lck, thread, lock_type);
2208 #endif /* DEBUG_RW */
2209 return lock_type;
2210 }
2211
2212 /*!
2213 * @function lck_rw_done
2214 *
2215 * @abstract
2216 * Force unlocks a rw_lock without consistency checks.
2217 *
2218 * @discussion
2219 * Do not use unless sure you can avoid consistency checks.
2220 *
2221 * @param lock rw_lock to unlock.
2222 */
2223 lck_rw_type_t
lck_rw_done(lck_rw_t * lock)2224 lck_rw_done(
2225 lck_rw_t *lock)
2226 {
2227 uint32_t data, prev;
2228 boolean_t once = FALSE;
2229
2230 #ifdef DEBUG_RW
2231 /*
2232 * Best effort attempt to check that this thread
2233 * is holding the lock.
2234 */
2235 thread_t thread = current_thread();
2236 assert_held_rwlock(lock, thread, 0);
2237 #endif /* DEBUG_RW */
2238 for (;;) {
2239 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2240 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2241 atomic_exchange_abort();
2242 lck_rw_interlock_spin(lock);
2243 continue;
2244 }
2245 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2246 assertf(lock->lck_rw_owner == 0,
2247 "state=0x%x, owner=%p", lock->lck_rw_data,
2248 ctid_get_thread_unsafe(lock->lck_rw_owner));
2249 data -= LCK_RW_SHARED_READER;
2250 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
2251 goto check_waiters;
2252 }
2253 } else { /* if reader count == 0, must be exclusive lock */
2254 if (data & LCK_RW_WANT_UPGRADE) {
2255 data &= ~(LCK_RW_WANT_UPGRADE);
2256 } else {
2257 if (data & LCK_RW_WANT_EXCL) {
2258 data &= ~(LCK_RW_WANT_EXCL);
2259 } else { /* lock is not 'owned', panic */
2260 panic("Releasing non-exclusive RW lock without a reader refcount!");
2261 }
2262 }
2263 if (!once) {
2264 // Only check for holder and clear it once
2265 assertf(lock->lck_rw_owner == current_thread()->ctid,
2266 "state=0x%x, owner=%p", lock->lck_rw_data,
2267 ctid_get_thread_unsafe(lock->lck_rw_owner));
2268 ordered_store_rw_owner(lock, 0);
2269 once = TRUE;
2270 }
2271 check_waiters:
2272 /*
2273 * test the original values to match what
2274 * lck_rw_done_gen is going to do to determine
2275 * which wakeups need to happen...
2276 *
2277 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2278 */
2279 if (prev & LCK_RW_W_WAITING) {
2280 data &= ~(LCK_RW_W_WAITING);
2281 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
2282 data &= ~(LCK_RW_R_WAITING);
2283 }
2284 } else {
2285 data &= ~(LCK_RW_R_WAITING);
2286 }
2287 }
2288 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
2289 break;
2290 }
2291 cpu_pause();
2292 }
2293 return lck_rw_done_gen(lock, prev);
2294 }
2295
2296 /*!
2297 * @function lck_rw_unlock_shared
2298 *
2299 * @abstract
2300 * Unlocks a rw_lock previously locked in shared mode.
2301 *
2302 * @discussion
2303 * The same thread that locked the lock needs to unlock it.
2304 *
2305 * @param lck rw_lock held in shared mode to unlock.
2306 */
2307 void
lck_rw_unlock_shared(lck_rw_t * lck)2308 lck_rw_unlock_shared(
2309 lck_rw_t *lck)
2310 {
2311 lck_rw_type_t ret;
2312
2313 assertf(lck->lck_rw_owner == 0,
2314 "state=0x%x, owner=%p", lck->lck_rw_data,
2315 ctid_get_thread_unsafe(lck->lck_rw_owner));
2316 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
2317 ret = lck_rw_done(lck);
2318
2319 if (ret != LCK_RW_TYPE_SHARED) {
2320 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
2321 }
2322 }
2323
2324 /*!
2325 * @function lck_rw_unlock_exclusive
2326 *
2327 * @abstract
2328 * Unlocks a rw_lock previously locked in exclusive mode.
2329 *
2330 * @discussion
2331 * The same thread that locked the lock needs to unlock it.
2332 *
2333 * @param lck rw_lock held in exclusive mode to unlock.
2334 */
2335 void
lck_rw_unlock_exclusive(lck_rw_t * lck)2336 lck_rw_unlock_exclusive(
2337 lck_rw_t *lck)
2338 {
2339 lck_rw_type_t ret;
2340
2341 assertf(lck->lck_rw_owner == current_thread()->ctid,
2342 "state=0x%x, owner=%p", lck->lck_rw_data,
2343 ctid_get_thread_unsafe(lck->lck_rw_owner));
2344 ret = lck_rw_done(lck);
2345
2346 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
2347 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
2348 }
2349 }
2350
2351 /*!
2352 * @function lck_rw_unlock
2353 *
2354 * @abstract
2355 * Unlocks a rw_lock previously locked with lck_rw_type.
2356 *
2357 * @discussion
2358 * The lock must be unlocked by the same thread it was locked from.
2359 * The type of the lock/unlock have to match, unless an upgrade/downgrade was performed while
2360 * holding the lock.
2361 *
2362 * @param lck rw_lock to unlock.
2363 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2364 */
2365 void
lck_rw_unlock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2366 lck_rw_unlock(
2367 lck_rw_t *lck,
2368 lck_rw_type_t lck_rw_type)
2369 {
2370 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2371 lck_rw_unlock_shared(lck);
2372 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2373 lck_rw_unlock_exclusive(lck);
2374 } else {
2375 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
2376 }
2377 }
2378
2379 /*!
2380 * @function lck_rw_assert
2381 *
2382 * @abstract
2383 * Asserts the rw_lock is held.
2384 *
2385 * @discussion
2386 * read-write locks do not have a concept of ownership when held in shared mode,
2387 * so this function merely asserts that someone is holding the lock, not necessarily the caller.
2388 * However if rw_lock_debug is on, a best effort mechanism to track the owners is in place, and
2389 * this function can be more accurate.
2390 * Type can be LCK_RW_ASSERT_SHARED, LCK_RW_ASSERT_EXCLUSIVE, LCK_RW_ASSERT_HELD
2391 * LCK_RW_ASSERT_NOTHELD.
2392 *
2393 * @param lck rw_lock to check.
2394 * @param type assert type
2395 */
2396 void
lck_rw_assert(lck_rw_t * lck,unsigned int type)2397 lck_rw_assert(
2398 lck_rw_t *lck,
2399 unsigned int type)
2400 {
2401 thread_t thread = current_thread();
2402
2403 switch (type) {
2404 case LCK_RW_ASSERT_SHARED:
2405 if ((lck->lck_rw_shared_count != 0) &&
2406 (lck->lck_rw_owner == 0)) {
2407 #if DEBUG_RW
2408 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2409 #endif /* DEBUG_RW */
2410 return;
2411 }
2412 break;
2413 case LCK_RW_ASSERT_EXCLUSIVE:
2414 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2415 (lck->lck_rw_shared_count == 0) &&
2416 (lck->lck_rw_owner == thread->ctid)) {
2417 #if DEBUG_RW
2418 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2419 #endif /* DEBUG_RW */
2420 return;
2421 }
2422 break;
2423 case LCK_RW_ASSERT_HELD:
2424 if (lck->lck_rw_shared_count != 0) {
2425 #if DEBUG_RW
2426 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2427 #endif /* DEBUG_RW */
2428 return; // Held shared
2429 }
2430 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2431 (lck->lck_rw_owner == thread->ctid)) {
2432 #if DEBUG_RW
2433 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2434 #endif /* DEBUG_RW */
2435 return; // Held exclusive
2436 }
2437 break;
2438 case LCK_RW_ASSERT_NOTHELD:
2439 if ((lck->lck_rw_shared_count == 0) &&
2440 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2441 (lck->lck_rw_owner == 0)) {
2442 #ifdef DEBUG_RW
2443 assert_canlock_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2444 #endif /* DEBUG_RW */
2445 return;
2446 }
2447 break;
2448 default:
2449 break;
2450 }
2451 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2452 }
2453
2454 /*!
2455 * @function kdp_lck_rw_lock_is_acquired_exclusive
2456 *
2457 * @abstract
2458 * Checks if a rw_lock is held exclusevely.
2459 *
2460 * @discussion
2461 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2462 *
2463 * @param lck lock to check
2464 *
2465 * @returns TRUE if the lock is held exclusevely
2466 */
2467 boolean_t
kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t * lck)2468 kdp_lck_rw_lock_is_acquired_exclusive(
2469 lck_rw_t *lck)
2470 {
2471 if (not_in_kdp) {
2472 panic("panic: rw lock exclusive check done outside of kernel debugger");
2473 }
2474 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2475 }
2476
2477 void
kdp_rwlck_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2478 kdp_rwlck_find_owner(
2479 __unused struct waitq *waitq,
2480 event64_t event,
2481 thread_waitinfo_t *waitinfo)
2482 {
2483 lck_rw_t *rwlck = NULL;
2484 switch (waitinfo->wait_type) {
2485 case kThreadWaitKernelRWLockRead:
2486 rwlck = READ_EVENT_TO_RWLOCK(event);
2487 break;
2488 case kThreadWaitKernelRWLockWrite:
2489 case kThreadWaitKernelRWLockUpgrade:
2490 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2491 break;
2492 default:
2493 panic("%s was called with an invalid blocking type", __FUNCTION__);
2494 break;
2495 }
2496 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2497 waitinfo->owner = thread_tid(ctid_get_thread(rwlck->lck_rw_owner));
2498 }
2499
2500 /*!
2501 * @function lck_rw_lock_would_yield_shared
2502 *
2503 * @abstract
2504 * Check whether a rw_lock currently held in shared mode would be yielded
2505 *
2506 * @discussion
2507 * This function can be used when lck_rw_lock_yield_shared() would be
2508 * inappropriate due to the need to perform additional housekeeping
2509 * prior to any yield or when the caller may wish to prematurely terminate
2510 * an operation rather than resume it after regaining the lock.
2511 *
2512 * @param lck rw_lock already held in shared mode to yield.
2513 *
2514 * @returns TRUE if the lock would yield, FALSE otherwise
2515 */
2516 bool
lck_rw_lock_would_yield_shared(lck_rw_t * lck)2517 lck_rw_lock_would_yield_shared(
2518 lck_rw_t *lck)
2519 {
2520 lck_rw_word_t word;
2521
2522 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2523
2524 word.data = ordered_load_rw(lck);
2525 if (word.want_excl || word.want_upgrade) {
2526 return true;
2527 }
2528
2529 return false;
2530 }
2531
2532 /*!
2533 * @function lck_rw_lock_yield_shared
2534 *
2535 * @abstract
2536 * Yields a rw_lock held in shared mode.
2537 *
2538 * @discussion
2539 * This function can block.
2540 * Yields the lock in case there are writers waiting.
2541 * The yield will unlock, block, and re-lock the lock in shared mode.
2542 *
2543 * @param lck rw_lock already held in shared mode to yield.
2544 * @param force_yield if set to true it will always yield irrespective of the lock status
2545 *
2546 * @returns TRUE if the lock was yield, FALSE otherwise
2547 */
2548 bool
lck_rw_lock_yield_shared(lck_rw_t * lck,boolean_t force_yield)2549 lck_rw_lock_yield_shared(
2550 lck_rw_t *lck,
2551 boolean_t force_yield)
2552 {
2553 if (lck_rw_lock_would_yield_shared(lck) || force_yield) {
2554 lck_rw_unlock_shared(lck);
2555 mutex_pause(2);
2556 lck_rw_lock_shared(lck);
2557 return true;
2558 }
2559
2560 return false;
2561 }
2562
2563 /*!
2564 * @function lck_rw_lock_would_yield_exclusive
2565 *
2566 * @abstract
2567 * Check whether a rw_lock currently held in exclusive mode would be yielded
2568 *
2569 * @discussion
2570 * This function can be used when lck_rw_lock_yield_exclusive would be
2571 * inappropriate due to the need to perform additional housekeeping
2572 * prior to any yield or when the caller may wish to prematurely terminate
2573 * an operation rather than resume it after regaining the lock.
2574 *
2575 * @param lck rw_lock already held in exclusive mode to yield.
2576 * @param mode when to yield.
2577 *
2578 * @returns TRUE if the lock would yield, FALSE otherwise
2579 */
2580 bool
lck_rw_lock_would_yield_exclusive(lck_rw_t * lck,lck_rw_yield_t mode)2581 lck_rw_lock_would_yield_exclusive(
2582 lck_rw_t *lck,
2583 lck_rw_yield_t mode)
2584 {
2585 lck_rw_word_t word;
2586 bool yield = false;
2587
2588 lck_rw_assert(lck, LCK_RW_ASSERT_EXCLUSIVE);
2589
2590 if (mode == LCK_RW_YIELD_ALWAYS) {
2591 yield = true;
2592 } else {
2593 word.data = ordered_load_rw(lck);
2594 if (word.w_waiting) {
2595 yield = true;
2596 } else if (mode == LCK_RW_YIELD_ANY_WAITER) {
2597 yield = (word.r_waiting != 0);
2598 }
2599 }
2600
2601 return yield;
2602 }
2603
2604 /*!
2605 * @function lck_rw_lock_yield_exclusive
2606 *
2607 * @abstract
2608 * Yields a rw_lock held in exclusive mode.
2609 *
2610 * @discussion
2611 * This function can block.
2612 * Yields the lock in case there are writers waiting.
2613 * The yield will unlock, block, and re-lock the lock in exclusive mode.
2614 *
2615 * @param lck rw_lock already held in exclusive mode to yield.
2616 * @param mode when to yield.
2617 *
2618 * @returns TRUE if the lock was yield, FALSE otherwise
2619 */
2620 bool
lck_rw_lock_yield_exclusive(lck_rw_t * lck,lck_rw_yield_t mode)2621 lck_rw_lock_yield_exclusive(
2622 lck_rw_t *lck,
2623 lck_rw_yield_t mode)
2624 {
2625 bool yield = lck_rw_lock_would_yield_exclusive(lck, mode);
2626
2627 if (yield) {
2628 lck_rw_unlock_exclusive(lck);
2629 mutex_pause(2);
2630 lck_rw_lock_exclusive(lck);
2631 }
2632
2633 return yield;
2634 }
2635
2636 /*!
2637 * @function lck_rw_sleep
2638 *
2639 * @abstract
2640 * Assert_wait on an event while holding the rw_lock.
2641 *
2642 * @discussion
2643 * the flags can decide how to re-acquire the lock upon wake up
2644 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2645 * and if the priority needs to be kept boosted until the lock is
2646 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2647 *
2648 * @param lck rw_lock to use to synch the assert_wait.
2649 * @param lck_sleep_action flags.
2650 * @param event event to assert_wait on.
2651 * @param interruptible wait type.
2652 */
2653 wait_result_t
lck_rw_sleep(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)2654 lck_rw_sleep(
2655 lck_rw_t *lck,
2656 lck_sleep_action_t lck_sleep_action,
2657 event_t event,
2658 wait_interrupt_t interruptible)
2659 {
2660 wait_result_t res;
2661 lck_rw_type_t lck_rw_type;
2662 thread_pri_floor_t token;
2663
2664 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2665 panic("Invalid lock sleep action %x", lck_sleep_action);
2666 }
2667
2668 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2669 /*
2670 * Although we are dropping the RW lock, the intent in most cases
2671 * is that this thread remains as an observer, since it may hold
2672 * some secondary resource, but must yield to avoid deadlock. In
2673 * this situation, make sure that the thread is boosted to the
2674 * ceiling while blocked, so that it can re-acquire the
2675 * RW lock at that priority.
2676 */
2677 token = thread_priority_floor_start();
2678 }
2679
2680 res = assert_wait(event, interruptible);
2681 if (res == THREAD_WAITING) {
2682 lck_rw_type = lck_rw_done(lck);
2683 res = thread_block(THREAD_CONTINUE_NULL);
2684 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2685 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2686 lck_rw_lock(lck, lck_rw_type);
2687 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2688 lck_rw_lock_exclusive(lck);
2689 } else {
2690 lck_rw_lock_shared(lck);
2691 }
2692 }
2693 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2694 (void)lck_rw_done(lck);
2695 }
2696
2697 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2698 thread_priority_floor_end(&token);
2699 }
2700
2701 return res;
2702 }
2703
2704 /*!
2705 * @function lck_rw_sleep_deadline
2706 *
2707 * @abstract
2708 * Assert_wait_deadline on an event while holding the rw_lock.
2709 *
2710 * @discussion
2711 * the flags can decide how to re-acquire the lock upon wake up
2712 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2713 * and if the priority needs to be kept boosted until the lock is
2714 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2715 *
2716 * @param lck rw_lock to use to synch the assert_wait.
2717 * @param lck_sleep_action flags.
2718 * @param event event to assert_wait on.
2719 * @param interruptible wait type.
2720 * @param deadline maximum time after which being woken up
2721 */
2722 wait_result_t
lck_rw_sleep_deadline(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)2723 lck_rw_sleep_deadline(
2724 lck_rw_t *lck,
2725 lck_sleep_action_t lck_sleep_action,
2726 event_t event,
2727 wait_interrupt_t interruptible,
2728 uint64_t deadline)
2729 {
2730 wait_result_t res;
2731 lck_rw_type_t lck_rw_type;
2732 thread_pri_floor_t token;
2733
2734 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2735 panic("Invalid lock sleep action %x", lck_sleep_action);
2736 }
2737
2738 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2739 token = thread_priority_floor_start();
2740 }
2741
2742 res = assert_wait_deadline(event, interruptible, deadline);
2743 if (res == THREAD_WAITING) {
2744 lck_rw_type = lck_rw_done(lck);
2745 res = thread_block(THREAD_CONTINUE_NULL);
2746 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2747 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2748 lck_rw_lock(lck, lck_rw_type);
2749 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2750 lck_rw_lock_exclusive(lck);
2751 } else {
2752 lck_rw_lock_shared(lck);
2753 }
2754 }
2755 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2756 (void)lck_rw_done(lck);
2757 }
2758
2759 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2760 thread_priority_floor_end(&token);
2761 }
2762
2763 return res;
2764 }
2765
2766 /*
2767 * Reader-writer lock promotion
2768 *
2769 * We support a limited form of reader-writer
2770 * lock promotion whose effects are:
2771 *
2772 * * Qualifying threads have decay disabled
2773 * * Scheduler priority is reset to a floor of
2774 * of their statically assigned priority
2775 * or MINPRI_RWLOCK
2776 *
2777 * The rationale is that lck_rw_ts do not have
2778 * a single owner, so we cannot apply a directed
2779 * priority boost from all waiting threads
2780 * to all holding threads without maintaining
2781 * lists of all shared owners and all waiting
2782 * threads for every lock.
2783 *
2784 * Instead (and to preserve the uncontended fast-
2785 * path), acquiring (or attempting to acquire)
2786 * a RW lock in shared or exclusive lock increments
2787 * a per-thread counter. Only if that thread stops
2788 * making forward progress (for instance blocking
2789 * on a mutex, or being preempted) do we consult
2790 * the counter and apply the priority floor.
2791 * When the thread becomes runnable again (or in
2792 * the case of preemption it never stopped being
2793 * runnable), it has the priority boost and should
2794 * be in a good position to run on the CPU and
2795 * release all RW locks (at which point the priority
2796 * boost is cleared).
2797 *
2798 * Care must be taken to ensure that priority
2799 * boosts are not retained indefinitely, since unlike
2800 * mutex priority boosts (where the boost is tied
2801 * to the mutex lifecycle), the boost is tied
2802 * to the thread and independent of any particular
2803 * lck_rw_t. Assertions are in place on return
2804 * to userspace so that the boost is not held
2805 * indefinitely.
2806 *
2807 * The routines that increment/decrement the
2808 * per-thread counter should err on the side of
2809 * incrementing any time a preemption is possible
2810 * and the lock would be visible to the rest of the
2811 * system as held (so it should be incremented before
2812 * interlocks are dropped/preemption is enabled, or
2813 * before a CAS is executed to acquire the lock).
2814 *
2815 */
2816
2817 /*!
2818 * @function lck_rw_clear_promotion
2819 *
2820 * @abstract
2821 * Undo priority promotions when the last rw_lock
2822 * is released by a thread (if a promotion was active).
2823 *
2824 * @param thread thread to demote.
2825 * @param lock object reason for the demotion.
2826 */
2827 __attribute__((noinline))
2828 static void
lck_rw_clear_promotion(thread_t thread,const void * lock)2829 lck_rw_clear_promotion(thread_t thread, const void *lock)
2830 {
2831 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
2832 spl_t s = splsched();
2833 thread_lock(thread);
2834
2835 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
2836 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED,
2837 unslide_for_kdebug(lock));
2838 }
2839
2840 thread_unlock(thread);
2841 splx(s);
2842 }
2843
2844 /*!
2845 * @function lck_rw_set_promotion_locked
2846 *
2847 * @abstract
2848 * Callout from context switch if the thread goes
2849 * off core with a positive rwlock_count.
2850 *
2851 * @discussion
2852 * Called at splsched with the thread locked.
2853 *
2854 * @param thread thread to promote.
2855 */
2856 __attribute__((always_inline))
2857 void
lck_rw_set_promotion_locked(thread_t thread)2858 lck_rw_set_promotion_locked(thread_t thread)
2859 {
2860 if (LcksOpts & LCK_OPTION_DISABLE_RW_PRIO) {
2861 return;
2862 }
2863
2864 assert(thread->rwlock_count > 0);
2865
2866 if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2867 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
2868 }
2869 }
2870
2871 __attribute__((always_inline))
2872 void
lck_rw_lock_count_inc(thread_t thread,const void * lock __unused)2873 lck_rw_lock_count_inc(thread_t thread, const void *lock __unused)
2874 {
2875 if (thread->rwlock_count++ == 0) {
2876 #if MACH_ASSERT
2877 /*
2878 * Set the ast to check that the
2879 * rwlock_count is going to be set to zero when
2880 * going back to userspace.
2881 * Set it only once when we increment it for the first time.
2882 */
2883 act_set_debug_assert();
2884 #endif
2885 }
2886 }
2887
2888 __abortlike
2889 static void
__lck_rw_lock_count_dec_panic(thread_t thread)2890 __lck_rw_lock_count_dec_panic(thread_t thread)
2891 {
2892 panic("rw lock count underflow for thread %p", thread);
2893 }
2894
2895 __attribute__((always_inline))
2896 void
lck_rw_lock_count_dec(thread_t thread,const void * lock)2897 lck_rw_lock_count_dec(thread_t thread, const void *lock)
2898 {
2899 uint32_t rwlock_count = thread->rwlock_count--;
2900
2901 if (rwlock_count == 0) {
2902 __lck_rw_lock_count_dec_panic(thread);
2903 }
2904
2905 if (__probable(rwlock_count == 1)) {
2906 /* sched_flags checked without lock, but will be rechecked while clearing */
2907 if (__improbable(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2908 lck_rw_clear_promotion(thread, lock);
2909 }
2910 }
2911 }
2912