1 /*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 #include <debug.h>
57 #include <kern/lock_stat.h>
58 #include <kern/locks.h>
59 #include <kern/zalloc.h>
60 #include <kern/thread.h>
61 #include <kern/processor.h>
62 #include <kern/sched_prim.h>
63 #include <kern/debug.h>
64 #include <machine/atomic.h>
65 #include <machine/machine_cpu.h>
66
67 KALLOC_TYPE_DEFINE(KT_LCK_RW, lck_rw_t, KT_PRIV_ACCT);
68
69 #define LCK_RW_WRITER_EVENT(lck) (event_t)((uintptr_t)(lck)+1)
70 #define LCK_RW_READER_EVENT(lck) (event_t)((uintptr_t)(lck)+2)
71 #define WRITE_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-1))
72 #define READ_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-2))
73
74 #if CONFIG_DTRACE
75 #define DTRACE_RW_SHARED 0x0 //reader
76 #define DTRACE_RW_EXCL 0x1 //writer
77 #define DTRACE_NO_FLAG 0x0 //not applicable
78 #endif /* CONFIG_DTRACE */
79
80 #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
81 #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
82 #define LCK_RW_LCK_SHARED_CODE 0x102
83 #define LCK_RW_LCK_SH_TO_EX_CODE 0x103
84 #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
85 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105
86
87 #if __x86_64__
88 #define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
89 #define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
90 #define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
91 #define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
92 #define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
93 #define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
94 #define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
95 #define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
96 #endif
97
98 #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
99 #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
100
101 #define ordered_load_rw(lock) os_atomic_load(&(lock)->lck_rw_data, compiler_acq_rel)
102 #define ordered_store_rw(lock, value) os_atomic_store(&(lock)->lck_rw_data, (value), compiler_acq_rel)
103 #define ordered_load_rw_owner(lock) os_atomic_load(&(lock)->lck_rw_owner, compiler_acq_rel)
104 #define ordered_store_rw_owner(lock, value) os_atomic_store(&(lock)->lck_rw_owner, (value), compiler_acq_rel)
105
106 #ifdef DEBUG_RW
107 static TUNABLE(bool, lck_rw_recursive_shared_assert_74048094, "lck_rw_recursive_shared_assert", false);
108 SECURITY_READ_ONLY_EARLY(vm_packing_params_t) rwlde_caller_packing_params =
109 VM_PACKING_PARAMS(LCK_RW_CALLER_PACKED);
110 #define rw_lock_debug_disabled() ((LcksOpts & disLkRWDebug) == disLkRWDebug)
111
112 #define set_rwlde_caller_packed(entry, caller) ((entry)->rwlde_caller_packed = VM_PACK_POINTER((vm_offset_t)caller, LCK_RW_CALLER_PACKED))
113 #define get_rwlde_caller(entry) ((void*)VM_UNPACK_POINTER(entry->rwlde_caller_packed, LCK_RW_CALLER_PACKED))
114
115 #endif /* DEBUG_RW */
116
117 /*!
118 * @function lck_rw_alloc_init
119 *
120 * @abstract
121 * Allocates and initializes a rw_lock_t.
122 *
123 * @discussion
124 * The function can block. See lck_rw_init() for initialization details.
125 *
126 * @param grp lock group to associate with the lock.
127 * @param attr lock attribute to initialize the lock.
128 *
129 * @returns NULL or the allocated lock
130 */
131 lck_rw_t *
lck_rw_alloc_init(lck_grp_t * grp,lck_attr_t * attr)132 lck_rw_alloc_init(
133 lck_grp_t *grp,
134 lck_attr_t *attr)
135 {
136 lck_rw_t *lck;
137
138 lck = zalloc_flags(KT_LCK_RW, Z_WAITOK | Z_ZERO);
139 lck_rw_init(lck, grp, attr);
140 return lck;
141 }
142
143 /*!
144 * @function lck_rw_init
145 *
146 * @abstract
147 * Initializes a rw_lock_t.
148 *
149 * @discussion
150 * Usage statistics for the lock are going to be added to the lock group provided.
151 *
152 * The lock attribute can be used to specify the lock contention behaviour.
153 * RW_WRITER_PRIORITY is the default behaviour (LCK_ATTR_NULL defaults to RW_WRITER_PRIORITY)
154 * and lck_attr_rw_shared_priority() can be used to set the behaviour to RW_SHARED_PRIORITY.
155 *
156 * RW_WRITER_PRIORITY gives priority to the writers upon contention with the readers;
157 * if the lock is held and a writer starts waiting for the lock, readers will not be able
158 * to acquire the lock until all writers stop contending. Readers could
159 * potentially starve.
160 * RW_SHARED_PRIORITY gives priority to the readers upon contention with the writers:
161 * unleass the lock is held in exclusive mode, readers will always be able to acquire the lock.
162 * Readers can lock a shared lock even if there are writers waiting. Writers could potentially
163 * starve.
164 *
165 * @param lck lock to initialize.
166 * @param grp lock group to associate with the lock.
167 * @param attr lock attribute to initialize the lock.
168 *
169 */
170 void
lck_rw_init(lck_rw_t * lck,lck_grp_t * grp,lck_attr_t * attr)171 lck_rw_init(
172 lck_rw_t *lck,
173 lck_grp_t *grp,
174 lck_attr_t *attr)
175 {
176 if (attr == LCK_ATTR_NULL) {
177 attr = &LockDefaultLckAttr;
178 }
179 memset(lck, 0, sizeof(lck_rw_t));
180 lck->lck_rw_can_sleep = TRUE;
181 if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0) {
182 lck->lck_rw_priv_excl = TRUE;
183 }
184
185 lck_grp_reference(grp);
186 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
187 }
188
189 /*!
190 * @function lck_rw_free
191 *
192 * @abstract
193 * Frees a rw_lock previously allocated with lck_rw_alloc_init().
194 *
195 * @discussion
196 * The lock must be not held by any thread.
197 *
198 * @param lck rw_lock to free.
199 */
200 void
lck_rw_free(lck_rw_t * lck,lck_grp_t * grp)201 lck_rw_free(
202 lck_rw_t *lck,
203 lck_grp_t *grp)
204 {
205 lck_rw_destroy(lck, grp);
206 zfree(KT_LCK_RW, lck);
207 }
208
209 /*!
210 * @function lck_rw_destroy
211 *
212 * @abstract
213 * Destroys a rw_lock previously initialized with lck_rw_init().
214 *
215 * @discussion
216 * The lock must be not held by any thread.
217 *
218 * @param lck rw_lock to destroy.
219 */
220 void
lck_rw_destroy(lck_rw_t * lck,lck_grp_t * grp)221 lck_rw_destroy(
222 lck_rw_t *lck,
223 lck_grp_t *grp)
224 {
225 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
226 panic("Destroying previously destroyed lock %p", lck);
227 }
228 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
229
230 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
231 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
232 lck_grp_deallocate(grp);
233 return;
234 }
235
236 #ifdef DEBUG_RW
237
238 /*
239 * Best effort mechanism to debug rw_locks.
240 *
241 * This mechanism is in addition to the owner checks. The owner is set
242 * only when the lock is held in exclusive mode so the checks do not cover
243 * the cases in which the lock is held in shared mode.
244 *
245 * This mechanism tentatively stores the rw_lock acquired and its debug
246 * information on the thread struct.
247 * Just up to LCK_RW_EXPECTED_MAX_NUMBER rw lock debug information can be stored.
248 *
249 * NOTE: LCK_RW_EXPECTED_MAX_NUMBER is the expected number of rw_locks held
250 * at the same time. If a thread holds more than this number of rw_locks we
251 * will start losing debug information.
252 * Increasing LCK_RW_EXPECTED_MAX_NUMBER will increase the probability we will
253 * store the debug information but it will require more memory per thread
254 * and longer lock/unlock time.
255 *
256 * If an empty slot is found for the debug information, we record the lock
257 * otherwise we set the overflow threshold flag.
258 *
259 * If we reached the overflow threshold we might stop asserting because we cannot be sure
260 * anymore if the lock was acquired or not.
261 *
262 * Even if we reached the overflow threshold, we try to store the debug information
263 * for the new locks acquired. This can be useful in core dumps to debug
264 * possible return to userspace without unlocking and to find possible readers
265 * holding the lock.
266 */
267 void
rw_lock_init(void)268 rw_lock_init(void)
269 {
270 if (kern_feature_override(KF_RW_LOCK_DEBUG_OVRD)) {
271 LcksOpts |= disLkRWDebug;
272 }
273 }
274
275 static inline struct rw_lock_debug_entry *
find_lock_in_savedlocks(lck_rw_t * lock,rw_lock_debug_t * rw_locks_held)276 find_lock_in_savedlocks(lck_rw_t* lock, rw_lock_debug_t *rw_locks_held)
277 {
278 int i;
279 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
280 struct rw_lock_debug_entry *existing = &rw_locks_held->rwld_locks[i];
281 if (existing->rwlde_lock == lock) {
282 return existing;
283 }
284 }
285
286 return NULL;
287 }
288
289 __abortlike
290 static void
rwlock_slot_panic(rw_lock_debug_t * rw_locks_held)291 rwlock_slot_panic(rw_lock_debug_t *rw_locks_held)
292 {
293 panic("No empty slot found in %p slot_used %d", rw_locks_held, rw_locks_held->rwld_locks_saved);
294 }
295
296 static inline struct rw_lock_debug_entry *
find_empty_slot(rw_lock_debug_t * rw_locks_held)297 find_empty_slot(rw_lock_debug_t *rw_locks_held)
298 {
299 int i;
300 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
301 struct rw_lock_debug_entry *entry = &rw_locks_held->rwld_locks[i];
302 if (entry->rwlde_lock == NULL) {
303 return entry;
304 }
305 }
306 rwlock_slot_panic(rw_locks_held);
307 }
308
309 __abortlike
310 static void
canlock_rwlock_panic(lck_rw_t * lock,thread_t thread,struct rw_lock_debug_entry * entry)311 canlock_rwlock_panic(lck_rw_t* lock, thread_t thread, struct rw_lock_debug_entry *entry)
312 {
313 panic("RW lock %p already held by %p caller %p mode_count %d state 0x%x owner 0x%p ",
314 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
315 ordered_load_rw(lock), ordered_load_rw_owner(lock));
316 }
317
318 static inline void
assert_canlock_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)319 assert_canlock_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
320 {
321 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
322
323 if (__probable(rw_lock_debug_disabled() || (rw_locks_held->rwld_locks_acquired == 0))) {
324 //no locks saved, safe to lock
325 return;
326 }
327
328 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
329 if (__improbable(entry != NULL)) {
330 boolean_t can_be_shared_recursive;
331 if (lck_rw_recursive_shared_assert_74048094) {
332 can_be_shared_recursive = (lock->lck_rw_priv_excl == 0);
333 } else {
334 /* currently rw_lock_shared is called recursively,
335 * until the code is fixed allow to lock
336 * recursively in shared mode
337 */
338 can_be_shared_recursive = TRUE;
339 }
340 if ((type == LCK_RW_TYPE_SHARED) && can_be_shared_recursive && entry->rwlde_mode_count >= 1) {
341 return;
342 }
343 canlock_rwlock_panic(lock, thread, entry);
344 }
345 }
346
347 __abortlike
348 static void
held_rwlock_notheld_panic(lck_rw_t * lock,thread_t thread)349 held_rwlock_notheld_panic(lck_rw_t* lock, thread_t thread)
350 {
351 panic("RW lock %p not held by %p", lock, thread);
352 }
353
354 __abortlike
355 static void
held_rwlock_notheld_with_info_panic(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,struct rw_lock_debug_entry * entry)356 held_rwlock_notheld_with_info_panic(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, struct rw_lock_debug_entry *entry)
357 {
358 if (type == LCK_RW_TYPE_EXCLUSIVE) {
359 panic("RW lock %p not held in exclusive by %p caller %p read %d state 0x%x owner 0x%p ",
360 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
361 ordered_load_rw(lock), ordered_load_rw_owner(lock));
362 } else {
363 panic("RW lock %p not held in shared by %p caller %p read %d state 0x%x owner 0x%p ",
364 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
365 ordered_load_rw(lock), ordered_load_rw_owner(lock));
366 }
367 }
368
369 static inline void
assert_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)370 assert_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
371 {
372 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
373
374 if (__probable(rw_lock_debug_disabled())) {
375 return;
376 }
377
378 if (__improbable(rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_locks_saved == 0)) {
379 if (rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_overflow == 0) {
380 held_rwlock_notheld_panic(lock, thread);
381 }
382 return;
383 }
384
385 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
386 if (__probable(entry != NULL)) {
387 if (type == LCK_RW_TYPE_EXCLUSIVE && entry->rwlde_mode_count != -1) {
388 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
389 } else {
390 if (type == LCK_RW_TYPE_SHARED && entry->rwlde_mode_count <= 0) {
391 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
392 }
393 }
394 } else {
395 if (rw_locks_held->rwld_overflow == 0) {
396 held_rwlock_notheld_panic(lock, thread);
397 }
398 }
399 }
400
401 static inline void
change_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t typeFrom,void * caller)402 change_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
403 {
404 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
405
406 if (__probable(rw_lock_debug_disabled())) {
407 return;
408 }
409
410 if (__improbable(rw_locks_held->rwld_locks_saved == 0)) {
411 if (rw_locks_held->rwld_overflow == 0) {
412 held_rwlock_notheld_panic(lock, thread);
413 }
414 return;
415 }
416
417 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
418 if (__probable(entry != NULL)) {
419 if (typeFrom == LCK_RW_TYPE_SHARED) {
420 //We are upgrading
421 assertf(entry->rwlde_mode_count == 1,
422 "RW lock %p not held by a single shared when upgrading "
423 "by %p caller %p read %d state 0x%x owner 0x%p ",
424 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
425 ordered_load_rw(lock), ordered_load_rw_owner(lock));
426 entry->rwlde_mode_count = -1;
427 set_rwlde_caller_packed(entry, caller);
428 } else {
429 //We are downgrading
430 assertf(entry->rwlde_mode_count == -1,
431 "RW lock %p not held in write mode when downgrading "
432 "by %p caller %p read %d state 0x%x owner 0x%p ",
433 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
434 ordered_load_rw(lock), ordered_load_rw_owner(lock));
435 entry->rwlde_mode_count = 1;
436 set_rwlde_caller_packed(entry, caller);
437 }
438 return;
439 }
440
441 if (rw_locks_held->rwld_overflow == 0) {
442 held_rwlock_notheld_panic(lock, thread);
443 }
444
445 if (rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER) {
446 //array is full
447 return;
448 }
449
450 struct rw_lock_debug_entry *null_entry = find_empty_slot(rw_locks_held);
451 null_entry->rwlde_lock = lock;
452 set_rwlde_caller_packed(null_entry, caller);
453 if (typeFrom == LCK_RW_TYPE_SHARED) {
454 null_entry->rwlde_mode_count = -1;
455 } else {
456 null_entry->rwlde_mode_count = 1;
457 }
458 rw_locks_held->rwld_locks_saved++;
459 }
460
461 __abortlike
462 static void
add_held_rwlock_too_many_panic(thread_t thread)463 add_held_rwlock_too_many_panic(thread_t thread)
464 {
465 panic("RW lock too many rw locks held, rwld_locks_acquired maxed out for thread %p", thread);
466 }
467
468 static inline void
add_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type,void * caller)469 add_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
470 {
471 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
472 struct rw_lock_debug_entry *null_entry;
473
474 if (__probable(rw_lock_debug_disabled())) {
475 return;
476 }
477
478 if (__improbable(rw_locks_held->rwld_locks_acquired == UINT32_MAX)) {
479 add_held_rwlock_too_many_panic(thread);
480 }
481 rw_locks_held->rwld_locks_acquired++;
482
483 if (type == LCK_RW_TYPE_EXCLUSIVE) {
484 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
485 //array is full
486 rw_locks_held->rwld_overflow = 1;
487 return;
488 }
489 null_entry = find_empty_slot(rw_locks_held);
490 null_entry->rwlde_lock = lock;
491 set_rwlde_caller_packed(null_entry, caller);
492 null_entry->rwlde_mode_count = -1;
493 rw_locks_held->rwld_locks_saved++;
494 return;
495 } else {
496 if (__probable(rw_locks_held->rwld_locks_saved == 0)) {
497 //array is empty
498 goto add_shared;
499 }
500
501 boolean_t allow_shared_recursive;
502 if (lck_rw_recursive_shared_assert_74048094) {
503 allow_shared_recursive = (lock->lck_rw_priv_excl == 0);
504 } else {
505 allow_shared_recursive = TRUE;
506 }
507 if (allow_shared_recursive) {
508 //It could be already locked in shared mode
509 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
510 if (entry != NULL) {
511 assert(entry->rwlde_mode_count > 0);
512 assertf(entry->rwlde_mode_count != INT8_MAX,
513 "RW lock %p with too many recursive shared held "
514 "from %p caller %p read %d state 0x%x owner 0x%p",
515 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
516 ordered_load_rw(lock), ordered_load_rw_owner(lock));
517 entry->rwlde_mode_count += 1;
518 return;
519 }
520 }
521
522 //none of the locks were a match
523 //try to add a new entry
524 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
525 //array is full
526 rw_locks_held->rwld_overflow = 1;
527 return;
528 }
529
530 add_shared:
531 null_entry = find_empty_slot(rw_locks_held);
532 null_entry->rwlde_lock = lock;
533 set_rwlde_caller_packed(null_entry, caller);
534 null_entry->rwlde_mode_count = 1;
535 rw_locks_held->rwld_locks_saved++;
536 }
537 }
538
539 static inline void
remove_held_rwlock(lck_rw_t * lock,thread_t thread,lck_rw_type_t type)540 remove_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
541 {
542 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
543
544 if (__probable(rw_lock_debug_disabled())) {
545 return;
546 }
547
548 if (__improbable(rw_locks_held->rwld_locks_acquired == 0)) {
549 return;
550 }
551 rw_locks_held->rwld_locks_acquired--;
552
553 if (rw_locks_held->rwld_locks_saved == 0) {
554 assert(rw_locks_held->rwld_overflow == 1);
555 goto out;
556 }
557
558 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
559 if (__probable(entry != NULL)) {
560 if (type == LCK_RW_TYPE_EXCLUSIVE) {
561 assert(entry->rwlde_mode_count == -1);
562 entry->rwlde_mode_count = 0;
563 } else {
564 assert(entry->rwlde_mode_count > 0);
565 entry->rwlde_mode_count--;
566 if (entry->rwlde_mode_count > 0) {
567 goto out;
568 }
569 }
570 entry->rwlde_caller_packed = 0;
571 entry->rwlde_lock = NULL;
572 rw_locks_held->rwld_locks_saved--;
573 } else {
574 assert(rw_locks_held->rwld_overflow == 1);
575 }
576
577 out:
578 if (rw_locks_held->rwld_locks_acquired == 0) {
579 rw_locks_held->rwld_overflow = 0;
580 }
581 return;
582 }
583 #endif /* DEBUG_RW */
584
585 /*
586 * We disable interrupts while holding the RW interlock to prevent an
587 * interrupt from exacerbating hold time.
588 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
589 */
590 static inline boolean_t
lck_interlock_lock(lck_rw_t * lck)591 lck_interlock_lock(
592 lck_rw_t *lck)
593 {
594 boolean_t istate;
595
596 istate = ml_set_interrupts_enabled(FALSE);
597 lck_rw_ilk_lock(lck);
598 return istate;
599 }
600
601 static inline void
lck_interlock_unlock(lck_rw_t * lck,boolean_t istate)602 lck_interlock_unlock(
603 lck_rw_t *lck,
604 boolean_t istate)
605 {
606 lck_rw_ilk_unlock(lck);
607 ml_set_interrupts_enabled(istate);
608 }
609
610 static inline void
lck_rw_inc_thread_count(thread_t thread)611 lck_rw_inc_thread_count(
612 thread_t thread)
613 {
614 __assert_only uint32_t prev_rwlock_count;
615
616 prev_rwlock_count = thread->rwlock_count++;
617 #if MACH_ASSERT
618 /*
619 * Set the ast to check that the
620 * rwlock_count is going to be set to zero when
621 * going back to userspace.
622 * Set it only once when we increment it for the first time.
623 */
624 if (prev_rwlock_count == 0) {
625 act_set_debug_assert();
626 }
627 #endif
628 }
629
630 /*
631 * compute the deadline to spin against when
632 * waiting for a change of state on a lck_rw_t
633 */
634 static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t * lck)635 lck_rw_deadline_for_spin(
636 lck_rw_t *lck)
637 {
638 lck_rw_word_t word;
639
640 word.data = ordered_load_rw(lck);
641 if (word.can_sleep) {
642 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
643 /*
644 * there are already threads waiting on this lock... this
645 * implies that they have spun beyond their deadlines waiting for
646 * the desired state to show up so we will not bother spinning at this time...
647 * or
648 * the current number of threads sharing this lock exceeds our capacity to run them
649 * concurrently and since all states we're going to spin for require the rw_shared_count
650 * to be at 0, we'll not bother spinning since the latency for this to happen is
651 * unpredictable...
652 */
653 return mach_absolute_time();
654 }
655 return mach_absolute_time() + os_atomic_load(&MutexSpin, relaxed);
656 } else {
657 return mach_absolute_time() + (100000LL * 1000000000LL);
658 }
659 }
660
661 /*
662 * This inline is used when busy-waiting for an rw lock.
663 * If interrupts were disabled when the lock primitive was called,
664 * we poll the IPI handler for pending tlb flushes in x86.
665 */
666 static inline void
lck_rw_lock_pause(boolean_t interrupts_enabled)667 lck_rw_lock_pause(
668 boolean_t interrupts_enabled)
669 {
670 #if X86_64
671 if (!interrupts_enabled) {
672 handle_pending_TLB_flushes();
673 }
674 cpu_pause();
675 #else
676 (void) interrupts_enabled;
677 wait_for_event();
678 #endif
679 }
680
681 static boolean_t
lck_rw_drain_status(lck_rw_t * lock,uint32_t status_mask,boolean_t wait)682 lck_rw_drain_status(
683 lck_rw_t *lock,
684 uint32_t status_mask,
685 boolean_t wait)
686 {
687 uint64_t deadline = 0;
688 uint32_t data;
689 boolean_t istate = FALSE;
690
691 if (wait) {
692 deadline = lck_rw_deadline_for_spin(lock);
693 #if __x86_64__
694 istate = ml_get_interrupts_enabled();
695 #endif
696 }
697
698 for (;;) {
699 #if __x86_64__
700 data = os_atomic_load(&lock->lck_rw_data, relaxed);
701 #else
702 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
703 #endif
704 if ((data & status_mask) == 0) {
705 break;
706 }
707 if (wait) {
708 lck_rw_lock_pause(istate);
709 } else {
710 atomic_exchange_abort();
711 }
712 if (!wait || (mach_absolute_time() >= deadline)) {
713 return FALSE;
714 }
715 }
716 atomic_exchange_abort();
717 return TRUE;
718 }
719
720 /*
721 * Spin while interlock is held.
722 */
723 static inline void
lck_rw_interlock_spin(lck_rw_t * lock)724 lck_rw_interlock_spin(
725 lck_rw_t *lock)
726 {
727 uint32_t data, prev;
728
729 for (;;) {
730 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_relaxed);
731 if (data & LCK_RW_INTERLOCK) {
732 #if __x86_64__
733 cpu_pause();
734 #else
735 wait_for_event();
736 #endif
737 } else {
738 atomic_exchange_abort();
739 return;
740 }
741 }
742 }
743
744 #define LCK_RW_GRAB_WANT 0
745 #define LCK_RW_GRAB_SHARED 1
746
747 static boolean_t
lck_rw_grab(lck_rw_t * lock,int mode,boolean_t wait)748 lck_rw_grab(
749 lck_rw_t *lock,
750 int mode,
751 boolean_t wait)
752 {
753 uint64_t deadline = 0;
754 uint32_t data, prev;
755 boolean_t do_exch, istate = FALSE;
756
757 if (wait) {
758 deadline = lck_rw_deadline_for_spin(lock);
759 #if __x86_64__
760 istate = ml_get_interrupts_enabled();
761 #endif
762 }
763
764 for (;;) {
765 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
766 if (data & LCK_RW_INTERLOCK) {
767 atomic_exchange_abort();
768 lck_rw_interlock_spin(lock);
769 continue;
770 }
771 do_exch = FALSE;
772 if (mode == LCK_RW_GRAB_WANT) {
773 if ((data & LCK_RW_WANT_EXCL) == 0) {
774 data |= LCK_RW_WANT_EXCL;
775 do_exch = TRUE;
776 }
777 } else { // LCK_RW_GRAB_SHARED
778 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
779 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
780 data += LCK_RW_SHARED_READER;
781 do_exch = TRUE;
782 }
783 }
784 if (do_exch) {
785 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
786 return TRUE;
787 }
788 } else {
789 if (wait) {
790 lck_rw_lock_pause(istate);
791 } else {
792 atomic_exchange_abort();
793 }
794 if (!wait || (mach_absolute_time() >= deadline)) {
795 return FALSE;
796 }
797 }
798 }
799 }
800
801 static void
lck_rw_lock_exclusive_gen(lck_rw_t * lock)802 lck_rw_lock_exclusive_gen(
803 lck_rw_t *lock)
804 {
805 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
806 lck_rw_word_t word;
807 int slept = 0;
808 boolean_t gotlock = 0;
809 boolean_t not_shared_or_upgrade = 0;
810 wait_result_t res = 0;
811 boolean_t istate;
812
813 #if CONFIG_DTRACE
814 boolean_t dtrace_ls_initialized = FALSE;
815 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
816 uint64_t wait_interval = 0;
817 int readers_at_sleep = 0;
818 #endif
819
820 __assert_only thread_t owner = ordered_load_rw_owner(lock);
821 assertf(owner != current_thread(), "Lock already held state=0x%x, owner=%p",
822 ordered_load_rw(lock), owner);
823
824 #ifdef DEBUG_RW
825 /*
826 * Best effort attempt to check that this thread
827 * is not already holding the lock (this checks read mode too).
828 */
829 assert_canlock_rwlock(lock, current_thread(), LCK_RW_TYPE_EXCLUSIVE);
830 #endif /* DEBUG_RW */
831
832 /*
833 * Try to acquire the lck_rw_want_excl bit.
834 */
835 while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
836 #if CONFIG_DTRACE
837 if (dtrace_ls_initialized == FALSE) {
838 dtrace_ls_initialized = TRUE;
839 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
840 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
841 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
842 if (dtrace_ls_enabled) {
843 /*
844 * Either sleeping or spinning is happening,
845 * start a timing of our delay interval now.
846 */
847 readers_at_sleep = lock->lck_rw_shared_count;
848 wait_interval = mach_absolute_time();
849 }
850 }
851 #endif
852
853 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
854
855 gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
856
857 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
858
859 if (gotlock) {
860 break;
861 }
862 /*
863 * if we get here, the deadline has expired w/o us
864 * being able to grab the lock exclusively
865 * check to see if we're allowed to do a thread_block
866 */
867 word.data = ordered_load_rw(lock);
868 if (word.can_sleep) {
869 istate = lck_interlock_lock(lock);
870 word.data = ordered_load_rw(lock);
871
872 if (word.want_excl) {
873 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
874
875 word.w_waiting = 1;
876 ordered_store_rw(lock, word.data);
877
878 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
879 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
880 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
881 lck_interlock_unlock(lock, istate);
882 if (res == THREAD_WAITING) {
883 res = thread_block(THREAD_CONTINUE_NULL);
884 slept++;
885 }
886 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
887 } else {
888 word.want_excl = 1;
889 ordered_store_rw(lock, word.data);
890 lck_interlock_unlock(lock, istate);
891 break;
892 }
893 }
894 }
895 /*
896 * Wait for readers (and upgrades) to finish...
897 */
898 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
899 #if CONFIG_DTRACE
900 /*
901 * Either sleeping or spinning is happening, start
902 * a timing of our delay interval now. If we set it
903 * to -1 we don't have accurate data so we cannot later
904 * decide to record a dtrace spin or sleep event.
905 */
906 if (dtrace_ls_initialized == FALSE) {
907 dtrace_ls_initialized = TRUE;
908 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
909 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
910 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
911 if (dtrace_ls_enabled) {
912 /*
913 * Either sleeping or spinning is happening,
914 * start a timing of our delay interval now.
915 */
916 readers_at_sleep = lock->lck_rw_shared_count;
917 wait_interval = mach_absolute_time();
918 }
919 }
920 #endif
921
922 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
923
924 not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);
925
926 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
927
928 if (not_shared_or_upgrade) {
929 break;
930 }
931 /*
932 * if we get here, the deadline has expired w/o us
933 * being able to grab the lock exclusively
934 * check to see if we're allowed to do a thread_block
935 */
936 word.data = ordered_load_rw(lock);
937 if (word.can_sleep) {
938 istate = lck_interlock_lock(lock);
939 word.data = ordered_load_rw(lock);
940
941 if (word.shared_count != 0 || word.want_upgrade) {
942 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
943
944 word.w_waiting = 1;
945 ordered_store_rw(lock, word.data);
946
947 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
948 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
949 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
950 lck_interlock_unlock(lock, istate);
951
952 if (res == THREAD_WAITING) {
953 res = thread_block(THREAD_CONTINUE_NULL);
954 slept++;
955 }
956 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
957 } else {
958 lck_interlock_unlock(lock, istate);
959 /*
960 * must own the lock now, since we checked for
961 * readers or upgrade owner behind the interlock
962 * no need for a call to 'lck_rw_drain_status'
963 */
964 break;
965 }
966 }
967 }
968
969 #if CONFIG_DTRACE
970 /*
971 * Decide what latencies we suffered that are Dtrace events.
972 * If we have set wait_interval, then we either spun or slept.
973 * At least we get out from under the interlock before we record
974 * which is the best we can do here to minimize the impact
975 * of the tracing.
976 * If we have set wait_interval to -1, then dtrace was not enabled when we
977 * started sleeping/spinning so we don't record this event.
978 */
979 if (dtrace_ls_enabled == TRUE) {
980 if (slept == 0) {
981 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
982 mach_absolute_time() - wait_interval, 1);
983 } else {
984 /*
985 * For the blocking case, we also record if when we blocked
986 * it was held for read or write, and how many readers.
987 * Notice that above we recorded this before we dropped
988 * the interlock so the count is accurate.
989 */
990 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
991 mach_absolute_time() - wait_interval, 1,
992 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
993 }
994 }
995 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
996 #endif /* CONFIG_DTRACE */
997 }
998
999 #define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
1000 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
1001 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
1002 /*!
1003 * @function lck_rw_lock_exclusive_check_contended
1004 *
1005 * @abstract
1006 * Locks a rw_lock in exclusive mode.
1007 *
1008 * @discussion
1009 * This routine IS EXPERIMENTAL.
1010 * It's only used for the vm object lock, and use for other subsystems is UNSUPPORTED.
1011 * Note that the return value is ONLY A HEURISTIC w.r.t. the lock's contention.
1012 *
1013 * @param lock rw_lock to lock.
1014 *
1015 * @returns Returns TRUE if the thread spun or blocked while attempting to acquire the lock, FALSE
1016 * otherwise.
1017 */
1018 bool
lck_rw_lock_exclusive_check_contended(lck_rw_t * lock)1019 lck_rw_lock_exclusive_check_contended(
1020 lck_rw_t *lock)
1021 {
1022 thread_t thread = current_thread();
1023 bool contended = false;
1024
1025 if (lock->lck_rw_can_sleep) {
1026 lck_rw_inc_thread_count(thread);
1027 } else if (get_preemption_level() == 0) {
1028 panic("Taking non-sleepable RW lock with preemption enabled");
1029 }
1030
1031 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1032 #if CONFIG_DTRACE
1033 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1034 #endif /* CONFIG_DTRACE */
1035 } else {
1036 contended = true;
1037 lck_rw_lock_exclusive_gen(lock);
1038 }
1039 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1040 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1041
1042 ordered_store_rw_owner(lock, thread);
1043
1044 #ifdef DEBUG_RW
1045 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, __builtin_return_address(0));
1046 #endif /* DEBUG_RW */
1047 return contended;
1048 }
1049
1050 __attribute__((always_inline))
1051 static void
lck_rw_lock_exclusive_internal_inline(lck_rw_t * lock,void * caller)1052 lck_rw_lock_exclusive_internal_inline(
1053 lck_rw_t *lock,
1054 void *caller)
1055 {
1056 #pragma unused(caller)
1057 thread_t thread = current_thread();
1058
1059 if (lock->lck_rw_can_sleep) {
1060 lck_rw_inc_thread_count(thread);
1061 } else if (get_preemption_level() == 0) {
1062 panic("Taking non-sleepable RW lock with preemption enabled");
1063 }
1064
1065 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1066 #if CONFIG_DTRACE
1067 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1068 #endif /* CONFIG_DTRACE */
1069 } else {
1070 lck_rw_lock_exclusive_gen(lock);
1071 }
1072
1073 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1074 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1075
1076 ordered_store_rw_owner(lock, thread);
1077
1078 #if DEBUG_RW
1079 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1080 #endif /* DEBUG_RW */
1081 }
1082
1083 __attribute__((noinline))
1084 static void
lck_rw_lock_exclusive_internal(lck_rw_t * lock,void * caller)1085 lck_rw_lock_exclusive_internal(
1086 lck_rw_t *lock,
1087 void *caller)
1088 {
1089 lck_rw_lock_exclusive_internal_inline(lock, caller);
1090 }
1091
1092 /*!
1093 * @function lck_rw_lock_exclusive
1094 *
1095 * @abstract
1096 * Locks a rw_lock in exclusive mode.
1097 *
1098 * @discussion
1099 * This function can block.
1100 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1101 * can acquire it in exclusive mode.
1102 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1103 *
1104 * @param lock rw_lock to lock.
1105 */
1106 void
lck_rw_lock_exclusive(lck_rw_t * lock)1107 lck_rw_lock_exclusive(
1108 lck_rw_t *lock)
1109 {
1110 lck_rw_lock_exclusive_internal_inline(lock, __builtin_return_address(0));
1111 }
1112
1113 /*
1114 * Routine: lck_rw_lock_shared_gen
1115 * Function:
1116 * Fast path code has determined that this lock
1117 * is held exclusively... this is where we spin/block
1118 * until we can acquire the lock in the shared mode
1119 */
1120 static void
lck_rw_lock_shared_gen(lck_rw_t * lck)1121 lck_rw_lock_shared_gen(
1122 lck_rw_t *lck)
1123 {
1124 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1125 lck_rw_word_t word;
1126 boolean_t gotlock = 0;
1127 int slept = 0;
1128 wait_result_t res = 0;
1129 boolean_t istate;
1130
1131 #if CONFIG_DTRACE
1132 uint64_t wait_interval = 0;
1133 int readers_at_sleep = 0;
1134 boolean_t dtrace_ls_initialized = FALSE;
1135 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1136 #endif /* CONFIG_DTRACE */
1137
1138 __assert_only thread_t owner = ordered_load_rw_owner(lck);
1139 assertf(owner != current_thread(), "Lock already held state=0x%x, owner=%p",
1140 ordered_load_rw(lck), owner);
1141 #ifdef DEBUG_RW
1142 /*
1143 * Best effort attempt to check that this thread
1144 * is not already holding the lock in shared mode.
1145 */
1146 assert_canlock_rwlock(lck, current_thread(), LCK_RW_TYPE_SHARED);
1147 #endif
1148
1149 while (!lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
1150 #if CONFIG_DTRACE
1151 if (dtrace_ls_initialized == FALSE) {
1152 dtrace_ls_initialized = TRUE;
1153 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1154 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1155 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1156 if (dtrace_ls_enabled) {
1157 /*
1158 * Either sleeping or spinning is happening,
1159 * start a timing of our delay interval now.
1160 */
1161 readers_at_sleep = lck->lck_rw_shared_count;
1162 wait_interval = mach_absolute_time();
1163 }
1164 }
1165 #endif
1166
1167 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1168 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
1169
1170 gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
1171
1172 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1173 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
1174
1175 if (gotlock) {
1176 break;
1177 }
1178 /*
1179 * if we get here, the deadline has expired w/o us
1180 * being able to grab the lock for read
1181 * check to see if we're allowed to do a thread_block
1182 */
1183 if (lck->lck_rw_can_sleep) {
1184 istate = lck_interlock_lock(lck);
1185
1186 word.data = ordered_load_rw(lck);
1187 if ((word.want_excl || word.want_upgrade) &&
1188 ((word.shared_count == 0) || word.priv_excl)) {
1189 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1190 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
1191
1192 word.r_waiting = 1;
1193 ordered_store_rw(lck, word.data);
1194
1195 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
1196 res = assert_wait(LCK_RW_READER_EVENT(lck),
1197 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1198 lck_interlock_unlock(lck, istate);
1199
1200 if (res == THREAD_WAITING) {
1201 res = thread_block(THREAD_CONTINUE_NULL);
1202 slept++;
1203 }
1204 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1205 trace_lck, res, slept, 0, 0);
1206 } else {
1207 word.shared_count++;
1208 ordered_store_rw(lck, word.data);
1209 lck_interlock_unlock(lck, istate);
1210 break;
1211 }
1212 }
1213 }
1214
1215 #if CONFIG_DTRACE
1216 if (dtrace_ls_enabled == TRUE) {
1217 if (slept == 0) {
1218 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1219 } else {
1220 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1221 mach_absolute_time() - wait_interval, 0,
1222 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1223 }
1224 }
1225 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1226 #endif /* CONFIG_DTRACE */
1227 }
1228
1229 __attribute__((always_inline))
1230 static void
lck_rw_lock_shared_internal_inline(lck_rw_t * lock,void * caller)1231 lck_rw_lock_shared_internal_inline(
1232 lck_rw_t *lock,
1233 void *caller)
1234 {
1235 #pragma unused(caller)
1236
1237 uint32_t data, prev;
1238 thread_t thread = current_thread();
1239 __assert_only thread_t owner;
1240 #ifdef DEBUG_RW
1241 boolean_t check_canlock = TRUE;
1242 #endif
1243
1244 if (lock->lck_rw_can_sleep) {
1245 lck_rw_inc_thread_count(thread);
1246 } else if (get_preemption_level() == 0) {
1247 panic("Taking non-sleepable RW lock with preemption enabled");
1248 }
1249
1250 for (;;) {
1251 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1252 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1253 atomic_exchange_abort();
1254 lck_rw_lock_shared_gen(lock);
1255 goto locked;
1256 }
1257 #ifdef DEBUG_RW
1258 if ((data & LCK_RW_SHARED_MASK) == 0) {
1259 /*
1260 * If the lock is uncontended,
1261 * we do not need to check if we can lock it
1262 */
1263 check_canlock = FALSE;
1264 }
1265 #endif
1266 data += LCK_RW_SHARED_READER;
1267 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1268 break;
1269 }
1270 cpu_pause();
1271 }
1272 #ifdef DEBUG_RW
1273 if (check_canlock) {
1274 /*
1275 * Best effort attempt to check that this thread
1276 * is not already holding the lock (this checks read mode too).
1277 */
1278 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1279 }
1280 #endif
1281 locked:
1282 owner = ordered_load_rw_owner(lock);
1283 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1284
1285 #if CONFIG_DTRACE
1286 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1287 #endif /* CONFIG_DTRACE */
1288
1289 #ifdef DEBUG_RW
1290 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1291 #endif /* DEBUG_RW */
1292 }
1293
1294 __attribute__((noinline))
1295 static void
lck_rw_lock_shared_internal(lck_rw_t * lock,void * caller)1296 lck_rw_lock_shared_internal(
1297 lck_rw_t *lock,
1298 void *caller)
1299 {
1300 lck_rw_lock_shared_internal_inline(lock, caller);
1301 }
1302
1303 /*!
1304 * @function lck_rw_lock_shared
1305 *
1306 * @abstract
1307 * Locks a rw_lock in shared mode.
1308 *
1309 * @discussion
1310 * This function can block.
1311 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1312 * can acquire it in exclusive mode.
1313 * If the lock is held in shared mode and there are no writers waiting, a reader will be able to acquire
1314 * the lock without waiting.
1315 * If the lock is held in shared mode and there is at least a writer waiting, a reader will wait
1316 * for all the writers to make progress if the lock was initialized with the default settings. Instead if
1317 * RW_SHARED_PRIORITY was selected at initialization time, a reader will never wait if the lock is held
1318 * in shared mode.
1319 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1320 *
1321 * @param lock rw_lock to lock.
1322 */
1323 void
lck_rw_lock_shared(lck_rw_t * lock)1324 lck_rw_lock_shared(
1325 lck_rw_t *lock)
1326 {
1327 lck_rw_lock_shared_internal_inline(lock, __builtin_return_address(0));
1328 }
1329
1330 /*
1331 * Routine: lck_rw_lock_shared_to_exclusive_failure
1332 * Function:
1333 * Fast path code has already dropped our read
1334 * count and determined that someone else owns 'lck_rw_want_upgrade'
1335 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1336 * all we need to do here is determine if a wakeup is needed
1337 */
1338 static boolean_t
lck_rw_lock_shared_to_exclusive_failure(lck_rw_t * lck,uint32_t prior_lock_state)1339 lck_rw_lock_shared_to_exclusive_failure(
1340 lck_rw_t *lck,
1341 uint32_t prior_lock_state)
1342 {
1343 thread_t thread = current_thread();
1344 uint32_t rwlock_count;
1345
1346 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1347 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1348 /*
1349 * Someone else has requested upgrade.
1350 * Since we've released the read lock, wake
1351 * him up if he's blocked waiting
1352 */
1353 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1354 }
1355
1356 /* Check if dropping the lock means that we need to unpromote */
1357 if (lck->lck_rw_can_sleep) {
1358 rwlock_count = thread->rwlock_count--;
1359 } else {
1360 rwlock_count = UINT32_MAX;
1361 }
1362
1363 if (rwlock_count == 0) {
1364 panic("rw lock count underflow for thread %p", thread);
1365 }
1366
1367 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1368 /* sched_flags checked without lock, but will be rechecked while clearing */
1369 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1370 }
1371
1372 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1373 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1374
1375 #ifdef DEBUG_RW
1376 remove_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
1377 #endif /* DEBUG_RW */
1378
1379 return FALSE;
1380 }
1381
1382 /*
1383 * Routine: lck_rw_lock_shared_to_exclusive_success
1384 * Function:
1385 * the fast path code has already dropped our read
1386 * count and successfully acquired 'lck_rw_want_upgrade'
1387 * we just need to wait for the rest of the readers to drain
1388 * and then we can return as the exclusive holder of this lock
1389 */
1390 static void
lck_rw_lock_shared_to_exclusive_success(lck_rw_t * lock)1391 lck_rw_lock_shared_to_exclusive_success(
1392 lck_rw_t *lock)
1393 {
1394 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1395 int slept = 0;
1396 lck_rw_word_t word;
1397 wait_result_t res;
1398 boolean_t istate;
1399 boolean_t not_shared;
1400
1401 #if CONFIG_DTRACE
1402 uint64_t wait_interval = 0;
1403 int readers_at_sleep = 0;
1404 boolean_t dtrace_ls_initialized = FALSE;
1405 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1406 #endif
1407
1408 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
1409 word.data = ordered_load_rw(lock);
1410 #if CONFIG_DTRACE
1411 if (dtrace_ls_initialized == FALSE) {
1412 dtrace_ls_initialized = TRUE;
1413 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1414 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1415 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1416 if (dtrace_ls_enabled) {
1417 /*
1418 * Either sleeping or spinning is happening,
1419 * start a timing of our delay interval now.
1420 */
1421 readers_at_sleep = word.shared_count;
1422 wait_interval = mach_absolute_time();
1423 }
1424 }
1425 #endif
1426
1427 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1428 trace_lck, word.shared_count, 0, 0, 0);
1429
1430 not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
1431
1432 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1433 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1434
1435 if (not_shared) {
1436 break;
1437 }
1438
1439 /*
1440 * if we get here, the spin deadline in lck_rw_wait_on_status()
1441 * has expired w/o the rw_shared_count having drained to 0
1442 * check to see if we're allowed to do a thread_block
1443 */
1444 if (word.can_sleep) {
1445 istate = lck_interlock_lock(lock);
1446
1447 word.data = ordered_load_rw(lock);
1448 if (word.shared_count != 0) {
1449 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1450 trace_lck, word.shared_count, 0, 0, 0);
1451
1452 word.w_waiting = 1;
1453 ordered_store_rw(lock, word.data);
1454
1455 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1456 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1457 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1458 lck_interlock_unlock(lock, istate);
1459
1460 if (res == THREAD_WAITING) {
1461 res = thread_block(THREAD_CONTINUE_NULL);
1462 slept++;
1463 }
1464 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1465 trace_lck, res, slept, 0, 0);
1466 } else {
1467 lck_interlock_unlock(lock, istate);
1468 break;
1469 }
1470 }
1471 }
1472 #if CONFIG_DTRACE
1473 /*
1474 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1475 */
1476 if (dtrace_ls_enabled == TRUE) {
1477 if (slept == 0) {
1478 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1479 } else {
1480 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1481 mach_absolute_time() - wait_interval, 1,
1482 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1483 }
1484 }
1485 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1486 #endif
1487 }
1488
1489 /*!
1490 * @function lck_rw_lock_shared_to_exclusive
1491 *
1492 * @abstract
1493 * Upgrades a rw_lock held in shared mode to exclusive.
1494 *
1495 * @discussion
1496 * This function can block.
1497 * Only one reader at a time can upgrade to exclusive mode. If the upgrades fails the function will
1498 * return with the lock not held.
1499 * The caller needs to hold the lock in shared mode to upgrade it.
1500 *
1501 * @param lock rw_lock already held in shared mode to upgrade.
1502 *
1503 * @returns TRUE if the lock was upgraded, FALSE if it was not possible.
1504 * If the function was not able to upgrade the lock, the lock will be dropped
1505 * by the function.
1506 */
1507 boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t * lock)1508 lck_rw_lock_shared_to_exclusive(
1509 lck_rw_t *lock)
1510 {
1511 uint32_t data, prev;
1512
1513 assertf(lock->lck_rw_priv_excl != 0, "lock %p thread %p", lock, current_thread());
1514
1515 #if DEBUG_RW
1516 thread_t thread = current_thread();
1517 assert_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1518 #endif /* DEBUG_RW */
1519
1520 for (;;) {
1521 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1522 if (data & LCK_RW_INTERLOCK) {
1523 atomic_exchange_abort();
1524 lck_rw_interlock_spin(lock);
1525 continue;
1526 }
1527 if (data & LCK_RW_WANT_UPGRADE) {
1528 data -= LCK_RW_SHARED_READER;
1529 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1530 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1531 }
1532 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1533 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1534 }
1535 } else {
1536 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1537 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1538 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1539 break;
1540 }
1541 }
1542 cpu_pause();
1543 }
1544 /* we now own the WANT_UPGRADE */
1545 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1546 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1547 }
1548 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1549 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1550
1551 ordered_store_rw_owner(lock, current_thread());
1552 #if CONFIG_DTRACE
1553 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1554 #endif /* CONFIG_DTRACE */
1555
1556 #if DEBUG_RW
1557 change_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, __builtin_return_address(0));
1558 #endif /* DEBUG_RW */
1559 return TRUE;
1560 }
1561
1562 /*
1563 * Routine: lck_rw_lock_exclusive_to_shared_gen
1564 * Function:
1565 * Fast path has already dropped
1566 * our exclusive state and bumped lck_rw_shared_count
1567 * all we need to do here is determine if anyone
1568 * needs to be awakened.
1569 */
1570 static void
lck_rw_lock_exclusive_to_shared_gen(lck_rw_t * lck,uint32_t prior_lock_state,void * caller)1571 lck_rw_lock_exclusive_to_shared_gen(
1572 lck_rw_t *lck,
1573 uint32_t prior_lock_state,
1574 void *caller)
1575 {
1576 #pragma unused(caller)
1577 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1578 lck_rw_word_t fake_lck;
1579
1580 /*
1581 * prior_lock state is a snapshot of the 1st word of the
1582 * lock in question... we'll fake up a pointer to it
1583 * and carefully not access anything beyond whats defined
1584 * in the first word of a lck_rw_t
1585 */
1586 fake_lck.data = prior_lock_state;
1587
1588 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1589 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1590
1591 /*
1592 * don't wake up anyone waiting to take the lock exclusively
1593 * since we hold a read count... when the read count drops to 0,
1594 * the writers will be woken.
1595 *
1596 * wake up any waiting readers if we don't have any writers waiting,
1597 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1598 */
1599 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1600 thread_wakeup(LCK_RW_READER_EVENT(lck));
1601 }
1602
1603 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1604 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1605
1606 #if CONFIG_DTRACE
1607 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1608 #endif
1609
1610 #if DEBUG_RW
1611 thread_t thread = current_thread();
1612 change_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1613 #endif /* DEBUG_RW */
1614 }
1615
1616 /*!
1617 * @function lck_rw_lock_exclusive_to_shared
1618 *
1619 * @abstract
1620 * Downgrades a rw_lock held in exclusive mode to shared.
1621 *
1622 * @discussion
1623 * The caller needs to hold the lock in exclusive mode to be able to downgrade it.
1624 *
1625 * @param lock rw_lock already held in exclusive mode to downgrade.
1626 */
1627 void
lck_rw_lock_exclusive_to_shared(lck_rw_t * lock)1628 lck_rw_lock_exclusive_to_shared(
1629 lck_rw_t *lock)
1630 {
1631 uint32_t data, prev;
1632
1633 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1634 ordered_store_rw_owner(lock, THREAD_NULL);
1635 for (;;) {
1636 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1637 if (data & LCK_RW_INTERLOCK) {
1638 atomic_exchange_abort();
1639 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1640 continue;
1641 }
1642 data += LCK_RW_SHARED_READER;
1643 if (data & LCK_RW_WANT_UPGRADE) {
1644 data &= ~(LCK_RW_WANT_UPGRADE);
1645 } else {
1646 data &= ~(LCK_RW_WANT_EXCL);
1647 }
1648 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1649 data &= ~(LCK_RW_W_WAITING);
1650 }
1651 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
1652 break;
1653 }
1654 cpu_pause();
1655 }
1656 lck_rw_lock_exclusive_to_shared_gen(lock, prev, __builtin_return_address(0));
1657 }
1658
1659 /*
1660 * Very sad hack, but the codegen for lck_rw_lock
1661 * is very unhappy with the combination of __builtin_return_address()
1662 * and a noreturn function. For some reason it adds more frames
1663 * than it should. rdar://76570684
1664 */
1665 void
1666 _lck_rw_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1667 #pragma clang diagnostic push
1668 #pragma clang diagnostic ignored "-Wmissing-noreturn"
1669 __attribute__((noinline, weak))
1670 void
_lck_rw_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1671 _lck_rw_lock_type_panic(
1672 lck_rw_t *lck,
1673 lck_rw_type_t lck_rw_type)
1674 {
1675 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1676 }
1677 #pragma clang diagnostic pop
1678
1679 /*!
1680 * @function lck_rw_lock
1681 *
1682 * @abstract
1683 * Locks a rw_lock with the specified type.
1684 *
1685 * @discussion
1686 * See lck_rw_lock_shared() or lck_rw_lock_exclusive() for more details.
1687 *
1688 * @param lck rw_lock to lock.
1689 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1690 */
1691 void
lck_rw_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1692 lck_rw_lock(
1693 lck_rw_t *lck,
1694 lck_rw_type_t lck_rw_type)
1695 {
1696 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1697 return lck_rw_lock_shared_internal(lck, __builtin_return_address(0));
1698 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1699 return lck_rw_lock_exclusive_internal(lck, __builtin_return_address(0));
1700 }
1701 _lck_rw_lock_type_panic(lck, lck_rw_type);
1702 }
1703
1704 __attribute__((always_inline))
1705 static boolean_t
lck_rw_try_lock_shared_internal_inline(lck_rw_t * lock,void * caller)1706 lck_rw_try_lock_shared_internal_inline(
1707 lck_rw_t *lock,
1708 void *caller)
1709 {
1710 #pragma unused(caller)
1711
1712 uint32_t data, prev;
1713 thread_t thread = current_thread();
1714 #ifdef DEBUG_RW
1715 boolean_t check_canlock = TRUE;
1716 #endif
1717
1718 for (;;) {
1719 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1720 if (data & LCK_RW_INTERLOCK) {
1721 atomic_exchange_abort();
1722 lck_rw_interlock_spin(lock);
1723 continue;
1724 }
1725 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1726 atomic_exchange_abort();
1727 return FALSE; /* lock is busy */
1728 }
1729 #ifdef DEBUG_RW
1730 if ((data & LCK_RW_SHARED_MASK) == 0) {
1731 /*
1732 * If the lock is uncontended,
1733 * we do not need to check if we can lock it
1734 */
1735 check_canlock = FALSE;
1736 }
1737 #endif
1738 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1739 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1740 break;
1741 }
1742 cpu_pause();
1743 }
1744 #ifdef DEBUG_RW
1745 if (check_canlock) {
1746 /*
1747 * Best effort attempt to check that this thread
1748 * is not already holding the lock (this checks read mode too).
1749 */
1750 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1751 }
1752 #endif
1753 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1754 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1755
1756 if (lock->lck_rw_can_sleep) {
1757 lck_rw_inc_thread_count(thread);
1758 } else if (get_preemption_level() == 0) {
1759 panic("Taking non-sleepable RW lock with preemption enabled");
1760 }
1761
1762 #if CONFIG_DTRACE
1763 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1764 #endif /* CONFIG_DTRACE */
1765
1766 #ifdef DEBUG_RW
1767 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1768 #endif /* DEBUG_RW */
1769 return TRUE;
1770 }
1771
1772 __attribute__((noinline))
1773 static boolean_t
lck_rw_try_lock_shared_internal(lck_rw_t * lock,void * caller)1774 lck_rw_try_lock_shared_internal(
1775 lck_rw_t *lock,
1776 void *caller)
1777 {
1778 return lck_rw_try_lock_shared_internal_inline(lock, caller);
1779 }
1780
1781 /*!
1782 * @function lck_rw_try_lock_shared
1783 *
1784 * @abstract
1785 * Tries to locks a rw_lock in read mode.
1786 *
1787 * @discussion
1788 * This function will return and not block in case the lock is already held.
1789 * See lck_rw_lock_shared for more details.
1790 *
1791 * @param lock rw_lock to lock.
1792 *
1793 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1794 */
1795 boolean_t
lck_rw_try_lock_shared(lck_rw_t * lock)1796 lck_rw_try_lock_shared(
1797 lck_rw_t *lock)
1798 {
1799 return lck_rw_try_lock_shared_internal_inline(lock, __builtin_return_address(0));
1800 }
1801
1802 __attribute__((always_inline))
1803 static boolean_t
lck_rw_try_lock_exclusive_internal_inline(lck_rw_t * lock,void * caller)1804 lck_rw_try_lock_exclusive_internal_inline(
1805 lck_rw_t *lock,
1806 void *caller)
1807 {
1808 #pragma unused(caller)
1809 uint32_t data, prev;
1810
1811 for (;;) {
1812 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1813 if (data & LCK_RW_INTERLOCK) {
1814 atomic_exchange_abort();
1815 lck_rw_interlock_spin(lock);
1816 continue;
1817 }
1818 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1819 atomic_exchange_abort();
1820 return FALSE;
1821 }
1822 data |= LCK_RW_WANT_EXCL;
1823 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) {
1824 break;
1825 }
1826 cpu_pause();
1827 }
1828 thread_t thread = current_thread();
1829
1830 if (lock->lck_rw_can_sleep) {
1831 lck_rw_inc_thread_count(thread);
1832 } else if (get_preemption_level() == 0) {
1833 panic("Taking non-sleepable RW lock with preemption enabled");
1834 }
1835
1836 __assert_only thread_t owner = ordered_load_rw_owner(lock);
1837 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1838
1839 ordered_store_rw_owner(lock, thread);
1840 #if CONFIG_DTRACE
1841 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1842 #endif /* CONFIG_DTRACE */
1843
1844 #ifdef DEBUG_RW
1845 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1846 #endif /* DEBUG_RW */
1847 return TRUE;
1848 }
1849
1850 __attribute__((noinline))
1851 static boolean_t
lck_rw_try_lock_exclusive_internal(lck_rw_t * lock,void * caller)1852 lck_rw_try_lock_exclusive_internal(
1853 lck_rw_t *lock,
1854 void *caller)
1855 {
1856 return lck_rw_try_lock_exclusive_internal_inline(lock, caller);
1857 }
1858
1859 /*!
1860 * @function lck_rw_try_lock_exclusive
1861 *
1862 * @abstract
1863 * Tries to locks a rw_lock in write mode.
1864 *
1865 * @discussion
1866 * This function will return and not block in case the lock is already held.
1867 * See lck_rw_lock_exclusive for more details.
1868 *
1869 * @param lock rw_lock to lock.
1870 *
1871 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1872 */
1873 boolean_t
lck_rw_try_lock_exclusive(lck_rw_t * lock)1874 lck_rw_try_lock_exclusive(
1875 lck_rw_t *lock)
1876 {
1877 return lck_rw_try_lock_exclusive_internal_inline(lock, __builtin_return_address(0));
1878 }
1879
1880 /*
1881 * Very sad hack, but the codegen for lck_rw_try_lock
1882 * is very unhappy with the combination of __builtin_return_address()
1883 * and a noreturn function. For some reason it adds more frames
1884 * than it should. rdar://76570684
1885 */
1886 boolean_t
1887 _lck_rw_try_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1888 #pragma clang diagnostic push
1889 #pragma clang diagnostic ignored "-Wmissing-noreturn"
1890 __attribute__((noinline, weak))
1891 boolean_t
_lck_rw_try_lock_type_panic(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1892 _lck_rw_try_lock_type_panic(
1893 lck_rw_t *lck,
1894 lck_rw_type_t lck_rw_type)
1895 {
1896 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1897 }
1898 #pragma clang diagnostic pop
1899
1900 /*!
1901 * @function lck_rw_try_lock
1902 *
1903 * @abstract
1904 * Tries to locks a rw_lock with the specified type.
1905 *
1906 * @discussion
1907 * This function will return and not wait/block in case the lock is already held.
1908 * See lck_rw_try_lock_shared() or lck_rw_try_lock_exclusive() for more details.
1909 *
1910 * @param lck rw_lock to lock.
1911 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1912 *
1913 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1914 */
1915 boolean_t
lck_rw_try_lock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)1916 lck_rw_try_lock(
1917 lck_rw_t *lck,
1918 lck_rw_type_t lck_rw_type)
1919 {
1920 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1921 return lck_rw_try_lock_shared_internal(lck, __builtin_return_address(0));
1922 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1923 return lck_rw_try_lock_exclusive_internal(lck, __builtin_return_address(0));
1924 }
1925 return _lck_rw_try_lock_type_panic(lck, lck_rw_type);
1926 }
1927
1928 /*
1929 * Routine: lck_rw_done_gen
1930 *
1931 * prior_lock_state is the value in the 1st
1932 * word of the lock at the time of a successful
1933 * atomic compare and exchange with the new value...
1934 * it represents the state of the lock before we
1935 * decremented the rw_shared_count or cleared either
1936 * rw_want_upgrade or rw_want_write and
1937 * the lck_x_waiting bits... since the wrapper
1938 * routine has already changed the state atomically,
1939 * we just need to decide if we should
1940 * wake up anyone and what value to return... we do
1941 * this by examining the state of the lock before
1942 * we changed it
1943 */
1944 static lck_rw_type_t
lck_rw_done_gen(lck_rw_t * lck,uint32_t prior_lock_state)1945 lck_rw_done_gen(
1946 lck_rw_t *lck,
1947 uint32_t prior_lock_state)
1948 {
1949 lck_rw_word_t fake_lck;
1950 lck_rw_type_t lock_type;
1951 thread_t thread;
1952 uint32_t rwlock_count;
1953
1954 /*
1955 * prior_lock state is a snapshot of the 1st word of the
1956 * lock in question... we'll fake up a pointer to it
1957 * and carefully not access anything beyond whats defined
1958 * in the first word of a lck_rw_t
1959 */
1960 fake_lck.data = prior_lock_state;
1961
1962 if (fake_lck.shared_count <= 1) {
1963 if (fake_lck.w_waiting) {
1964 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1965 }
1966
1967 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1968 thread_wakeup(LCK_RW_READER_EVENT(lck));
1969 }
1970 }
1971 if (fake_lck.shared_count) {
1972 lock_type = LCK_RW_TYPE_SHARED;
1973 } else {
1974 lock_type = LCK_RW_TYPE_EXCLUSIVE;
1975 }
1976
1977 /* Check if dropping the lock means that we need to unpromote */
1978 thread = current_thread();
1979 if (fake_lck.can_sleep) {
1980 rwlock_count = thread->rwlock_count--;
1981 } else {
1982 rwlock_count = UINT32_MAX;
1983 }
1984
1985 if (rwlock_count == 0) {
1986 panic("rw lock count underflow for thread %p", thread);
1987 }
1988
1989 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1990 /* sched_flags checked without lock, but will be rechecked while clearing */
1991 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1992 }
1993 #if CONFIG_DTRACE
1994 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
1995 #endif
1996
1997 #ifdef DEBUG_RW
1998 remove_held_rwlock(lck, thread, lock_type);
1999 #endif /* DEBUG_RW */
2000 return lock_type;
2001 }
2002
2003 /*!
2004 * @function lck_rw_done
2005 *
2006 * @abstract
2007 * Force unlocks a rw_lock without consistency checks.
2008 *
2009 * @discussion
2010 * Do not use unless sure you can avoid consistency checks.
2011 *
2012 * @param lock rw_lock to unlock.
2013 */
2014 lck_rw_type_t
lck_rw_done(lck_rw_t * lock)2015 lck_rw_done(
2016 lck_rw_t *lock)
2017 {
2018 uint32_t data, prev;
2019 boolean_t once = FALSE;
2020
2021 #ifdef DEBUG_RW
2022 /*
2023 * Best effort attempt to check that this thread
2024 * is holding the lock.
2025 */
2026 thread_t thread = current_thread();
2027 assert_held_rwlock(lock, thread, 0);
2028 #endif /* DEBUG_RW */
2029 for (;;) {
2030 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2031 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2032 atomic_exchange_abort();
2033 lck_rw_interlock_spin(lock);
2034 continue;
2035 }
2036 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2037 assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2038 data -= LCK_RW_SHARED_READER;
2039 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
2040 goto check_waiters;
2041 }
2042 } else { /* if reader count == 0, must be exclusive lock */
2043 if (data & LCK_RW_WANT_UPGRADE) {
2044 data &= ~(LCK_RW_WANT_UPGRADE);
2045 } else {
2046 if (data & LCK_RW_WANT_EXCL) {
2047 data &= ~(LCK_RW_WANT_EXCL);
2048 } else { /* lock is not 'owned', panic */
2049 panic("Releasing non-exclusive RW lock without a reader refcount!");
2050 }
2051 }
2052 if (!once) {
2053 // Only check for holder and clear it once
2054 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2055 ordered_store_rw_owner(lock, THREAD_NULL);
2056 once = TRUE;
2057 }
2058 check_waiters:
2059 /*
2060 * test the original values to match what
2061 * lck_rw_done_gen is going to do to determine
2062 * which wakeups need to happen...
2063 *
2064 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2065 */
2066 if (prev & LCK_RW_W_WAITING) {
2067 data &= ~(LCK_RW_W_WAITING);
2068 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
2069 data &= ~(LCK_RW_R_WAITING);
2070 }
2071 } else {
2072 data &= ~(LCK_RW_R_WAITING);
2073 }
2074 }
2075 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) {
2076 break;
2077 }
2078 cpu_pause();
2079 }
2080 return lck_rw_done_gen(lock, prev);
2081 }
2082
2083 /*!
2084 * @function lck_rw_unlock_shared
2085 *
2086 * @abstract
2087 * Unlocks a rw_lock previously locked in shared mode.
2088 *
2089 * @discussion
2090 * The same thread that locked the lock needs to unlock it.
2091 *
2092 * @param lck rw_lock held in shared mode to unlock.
2093 */
2094 void
lck_rw_unlock_shared(lck_rw_t * lck)2095 lck_rw_unlock_shared(
2096 lck_rw_t *lck)
2097 {
2098 lck_rw_type_t ret;
2099
2100 assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
2101 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
2102 ret = lck_rw_done(lck);
2103
2104 if (ret != LCK_RW_TYPE_SHARED) {
2105 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
2106 }
2107 }
2108
2109 /*!
2110 * @function lck_rw_unlock_exclusive
2111 *
2112 * @abstract
2113 * Unlocks a rw_lock previously locked in exclusive mode.
2114 *
2115 * @discussion
2116 * The same thread that locked the lock needs to unlock it.
2117 *
2118 * @param lck rw_lock held in exclusive mode to unlock.
2119 */
2120 void
lck_rw_unlock_exclusive(lck_rw_t * lck)2121 lck_rw_unlock_exclusive(
2122 lck_rw_t *lck)
2123 {
2124 lck_rw_type_t ret;
2125
2126 assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
2127 ret = lck_rw_done(lck);
2128
2129 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
2130 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
2131 }
2132 }
2133
2134 /*!
2135 * @function lck_rw_unlock
2136 *
2137 * @abstract
2138 * Unlocks a rw_lock previously locked with lck_rw_type.
2139 *
2140 * @discussion
2141 * The lock must be unlocked by the same thread it was locked from.
2142 * The type of the lock/unlock have to match, unless an upgrade/downgrade was performed while
2143 * holding the lock.
2144 *
2145 * @param lck rw_lock to unlock.
2146 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2147 */
2148 void
lck_rw_unlock(lck_rw_t * lck,lck_rw_type_t lck_rw_type)2149 lck_rw_unlock(
2150 lck_rw_t *lck,
2151 lck_rw_type_t lck_rw_type)
2152 {
2153 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2154 lck_rw_unlock_shared(lck);
2155 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2156 lck_rw_unlock_exclusive(lck);
2157 } else {
2158 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
2159 }
2160 }
2161
2162 /*!
2163 * @function lck_rw_assert
2164 *
2165 * @abstract
2166 * Asserts the rw_lock is held.
2167 *
2168 * @discussion
2169 * read-write locks do not have a concept of ownership when held in shared mode,
2170 * so this function merely asserts that someone is holding the lock, not necessarily the caller.
2171 * However if rw_lock_debug is on, a best effort mechanism to track the owners is in place, and
2172 * this function can be more accurate.
2173 * Type can be LCK_RW_ASSERT_SHARED, LCK_RW_ASSERT_EXCLUSIVE, LCK_RW_ASSERT_HELD
2174 * LCK_RW_ASSERT_NOTHELD.
2175 *
2176 * @param lck rw_lock to check.
2177 * @param type assert type
2178 */
2179 void
lck_rw_assert(lck_rw_t * lck,unsigned int type)2180 lck_rw_assert(
2181 lck_rw_t *lck,
2182 unsigned int type)
2183 {
2184 #if DEBUG_RW
2185 thread_t thread = current_thread();
2186 #endif /* DEBUG_RW */
2187
2188 switch (type) {
2189 case LCK_RW_ASSERT_SHARED:
2190 if ((lck->lck_rw_shared_count != 0) &&
2191 (lck->lck_rw_owner == THREAD_NULL)) {
2192 #if DEBUG_RW
2193 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2194 #endif /* DEBUG_RW */
2195 return;
2196 }
2197 break;
2198 case LCK_RW_ASSERT_EXCLUSIVE:
2199 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2200 (lck->lck_rw_shared_count == 0) &&
2201 (lck->lck_rw_owner == current_thread())) {
2202 #if DEBUG_RW
2203 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2204 #endif /* DEBUG_RW */
2205 return;
2206 }
2207 break;
2208 case LCK_RW_ASSERT_HELD:
2209 if (lck->lck_rw_shared_count != 0) {
2210 #if DEBUG_RW
2211 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2212 #endif /* DEBUG_RW */
2213 return; // Held shared
2214 }
2215 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2216 (lck->lck_rw_owner == current_thread())) {
2217 #if DEBUG_RW
2218 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2219 #endif /* DEBUG_RW */
2220 return; // Held exclusive
2221 }
2222 break;
2223 case LCK_RW_ASSERT_NOTHELD:
2224 if ((lck->lck_rw_shared_count == 0) &&
2225 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2226 (lck->lck_rw_owner == THREAD_NULL)) {
2227 #ifdef DEBUG_RW
2228 assert_canlock_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2229 #endif /* DEBUG_RW */
2230 return;
2231 }
2232 break;
2233 default:
2234 break;
2235 }
2236 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2237 }
2238
2239 /*!
2240 * @function kdp_lck_rw_lock_is_acquired_exclusive
2241 *
2242 * @abstract
2243 * Checks if a rw_lock is held exclusevely.
2244 *
2245 * @discussion
2246 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2247 *
2248 * @param lck lock to check
2249 *
2250 * @returns TRUE if the lock is held exclusevely
2251 */
2252 boolean_t
kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t * lck)2253 kdp_lck_rw_lock_is_acquired_exclusive(
2254 lck_rw_t *lck)
2255 {
2256 if (not_in_kdp) {
2257 panic("panic: rw lock exclusive check done outside of kernel debugger");
2258 }
2259 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2260 }
2261
2262 void
kdp_rwlck_find_owner(__unused struct waitq * waitq,event64_t event,thread_waitinfo_t * waitinfo)2263 kdp_rwlck_find_owner(
2264 __unused struct waitq *waitq,
2265 event64_t event,
2266 thread_waitinfo_t *waitinfo)
2267 {
2268 lck_rw_t *rwlck = NULL;
2269 switch (waitinfo->wait_type) {
2270 case kThreadWaitKernelRWLockRead:
2271 rwlck = READ_EVENT_TO_RWLOCK(event);
2272 break;
2273 case kThreadWaitKernelRWLockWrite:
2274 case kThreadWaitKernelRWLockUpgrade:
2275 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2276 break;
2277 default:
2278 panic("%s was called with an invalid blocking type", __FUNCTION__);
2279 break;
2280 }
2281 if (rwlck->lck_rw_owner) {
2282 thread_require(rwlck->lck_rw_owner);
2283 }
2284 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2285 waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
2286 }
2287
2288 /*!
2289 * @function lck_rw_lock_yield_shared
2290 *
2291 * @abstract
2292 * Yields a rw_lock held in shared mode.
2293 *
2294 * @discussion
2295 * This function can block.
2296 * Yields the lock in case there are writers waiting.
2297 * The yield will unlock, block, and re-lock the lock in shared mode.
2298 *
2299 * @param lck rw_lock already held in shared mode to yield.
2300 * @param force_yield if set to true it will always yield irrespective of the lock status
2301 *
2302 * @returns TRUE if the lock was yield, FALSE otherwise
2303 */
2304 boolean_t
lck_rw_lock_yield_shared(lck_rw_t * lck,boolean_t force_yield)2305 lck_rw_lock_yield_shared(
2306 lck_rw_t *lck,
2307 boolean_t force_yield)
2308 {
2309 lck_rw_word_t word;
2310
2311 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2312
2313 word.data = ordered_load_rw(lck);
2314 if (word.want_excl || word.want_upgrade || force_yield) {
2315 lck_rw_unlock_shared(lck);
2316 mutex_pause(2);
2317 lck_rw_lock_shared(lck);
2318 return TRUE;
2319 }
2320
2321 return FALSE;
2322 }
2323
2324 /*!
2325 * @function lck_rw_sleep
2326 *
2327 * @abstract
2328 * Assert_wait on an event while holding the rw_lock.
2329 *
2330 * @discussion
2331 * the flags can decide how to re-acquire the lock upon wake up
2332 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2333 * and if the priority needs to be kept boosted until the lock is
2334 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2335 *
2336 * @param lck rw_lock to use to synch the assert_wait.
2337 * @param lck_sleep_action flags.
2338 * @param event event to assert_wait on.
2339 * @param interruptible wait type.
2340 */
2341 wait_result_t
lck_rw_sleep(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible)2342 lck_rw_sleep(
2343 lck_rw_t *lck,
2344 lck_sleep_action_t lck_sleep_action,
2345 event_t event,
2346 wait_interrupt_t interruptible)
2347 {
2348 wait_result_t res;
2349 lck_rw_type_t lck_rw_type;
2350 thread_pri_floor_t token;
2351
2352 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2353 panic("Invalid lock sleep action %x", lck_sleep_action);
2354 }
2355
2356 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2357 /*
2358 * Although we are dropping the RW lock, the intent in most cases
2359 * is that this thread remains as an observer, since it may hold
2360 * some secondary resource, but must yield to avoid deadlock. In
2361 * this situation, make sure that the thread is boosted to the
2362 * ceiling while blocked, so that it can re-acquire the
2363 * RW lock at that priority.
2364 */
2365 token = thread_priority_floor_start();
2366 }
2367
2368 res = assert_wait(event, interruptible);
2369 if (res == THREAD_WAITING) {
2370 lck_rw_type = lck_rw_done(lck);
2371 res = thread_block(THREAD_CONTINUE_NULL);
2372 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2373 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2374 lck_rw_lock(lck, lck_rw_type);
2375 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2376 lck_rw_lock_exclusive(lck);
2377 } else {
2378 lck_rw_lock_shared(lck);
2379 }
2380 }
2381 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2382 (void)lck_rw_done(lck);
2383 }
2384
2385 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2386 thread_priority_floor_end(&token);
2387 }
2388
2389 return res;
2390 }
2391
2392 /*!
2393 * @function lck_rw_sleep_deadline
2394 *
2395 * @abstract
2396 * Assert_wait_deadline on an event while holding the rw_lock.
2397 *
2398 * @discussion
2399 * the flags can decide how to re-acquire the lock upon wake up
2400 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2401 * and if the priority needs to be kept boosted until the lock is
2402 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2403 *
2404 * @param lck rw_lock to use to synch the assert_wait.
2405 * @param lck_sleep_action flags.
2406 * @param event event to assert_wait on.
2407 * @param interruptible wait type.
2408 * @param deadline maximum time after which being woken up
2409 */
2410 wait_result_t
lck_rw_sleep_deadline(lck_rw_t * lck,lck_sleep_action_t lck_sleep_action,event_t event,wait_interrupt_t interruptible,uint64_t deadline)2411 lck_rw_sleep_deadline(
2412 lck_rw_t *lck,
2413 lck_sleep_action_t lck_sleep_action,
2414 event_t event,
2415 wait_interrupt_t interruptible,
2416 uint64_t deadline)
2417 {
2418 wait_result_t res;
2419 lck_rw_type_t lck_rw_type;
2420 thread_pri_floor_t token;
2421
2422 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2423 panic("Invalid lock sleep action %x", lck_sleep_action);
2424 }
2425
2426 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2427 token = thread_priority_floor_start();
2428 }
2429
2430 res = assert_wait_deadline(event, interruptible, deadline);
2431 if (res == THREAD_WAITING) {
2432 lck_rw_type = lck_rw_done(lck);
2433 res = thread_block(THREAD_CONTINUE_NULL);
2434 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2435 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2436 lck_rw_lock(lck, lck_rw_type);
2437 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2438 lck_rw_lock_exclusive(lck);
2439 } else {
2440 lck_rw_lock_shared(lck);
2441 }
2442 }
2443 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2444 (void)lck_rw_done(lck);
2445 }
2446
2447 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2448 thread_priority_floor_end(&token);
2449 }
2450
2451 return res;
2452 }
2453
2454 /*
2455 * Reader-writer lock promotion
2456 *
2457 * We support a limited form of reader-writer
2458 * lock promotion whose effects are:
2459 *
2460 * * Qualifying threads have decay disabled
2461 * * Scheduler priority is reset to a floor of
2462 * of their statically assigned priority
2463 * or MINPRI_RWLOCK
2464 *
2465 * The rationale is that lck_rw_ts do not have
2466 * a single owner, so we cannot apply a directed
2467 * priority boost from all waiting threads
2468 * to all holding threads without maintaining
2469 * lists of all shared owners and all waiting
2470 * threads for every lock.
2471 *
2472 * Instead (and to preserve the uncontended fast-
2473 * path), acquiring (or attempting to acquire)
2474 * a RW lock in shared or exclusive lock increments
2475 * a per-thread counter. Only if that thread stops
2476 * making forward progress (for instance blocking
2477 * on a mutex, or being preempted) do we consult
2478 * the counter and apply the priority floor.
2479 * When the thread becomes runnable again (or in
2480 * the case of preemption it never stopped being
2481 * runnable), it has the priority boost and should
2482 * be in a good position to run on the CPU and
2483 * release all RW locks (at which point the priority
2484 * boost is cleared).
2485 *
2486 * Care must be taken to ensure that priority
2487 * boosts are not retained indefinitely, since unlike
2488 * mutex priority boosts (where the boost is tied
2489 * to the mutex lifecycle), the boost is tied
2490 * to the thread and independent of any particular
2491 * lck_rw_t. Assertions are in place on return
2492 * to userspace so that the boost is not held
2493 * indefinitely.
2494 *
2495 * The routines that increment/decrement the
2496 * per-thread counter should err on the side of
2497 * incrementing any time a preemption is possible
2498 * and the lock would be visible to the rest of the
2499 * system as held (so it should be incremented before
2500 * interlocks are dropped/preemption is enabled, or
2501 * before a CAS is executed to acquire the lock).
2502 *
2503 */
2504
2505 /*!
2506 * @function lck_rw_clear_promotion
2507 *
2508 * @abstract
2509 * Undo priority promotions when the last rw_lock
2510 * is released by a thread (if a promotion was active).
2511 *
2512 * @param thread thread to demote.
2513 * @param trace_obj object reason for the demotion.
2514 */
2515 void
lck_rw_clear_promotion(thread_t thread,uintptr_t trace_obj)2516 lck_rw_clear_promotion(
2517 thread_t thread,
2518 uintptr_t trace_obj)
2519 {
2520 assert(thread->rwlock_count == 0);
2521
2522 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
2523 spl_t s = splsched();
2524 thread_lock(thread);
2525
2526 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
2527 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj);
2528 }
2529
2530 thread_unlock(thread);
2531 splx(s);
2532 }
2533
2534 /*!
2535 * @function lck_rw_set_promotion_locked
2536 *
2537 * @abstract
2538 * Callout from context switch if the thread goes
2539 * off core with a positive rwlock_count.
2540 *
2541 * @discussion
2542 * Called at splsched with the thread locked.
2543 *
2544 * @param thread thread to promote.
2545 */
2546 void
lck_rw_set_promotion_locked(thread_t thread)2547 lck_rw_set_promotion_locked(thread_t thread)
2548 {
2549 if (LcksOpts & disLkRWPrio) {
2550 return;
2551 }
2552
2553 assert(thread->rwlock_count > 0);
2554
2555 if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2556 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
2557 }
2558 }
2559
2560 #if __x86_64__
2561 void lck_rw_clear_promotions_x86(thread_t thread);
2562 /*
2563 * On return to userspace, this routine is called from assembly
2564 * if the rwlock_count is somehow imbalanced
2565 */
2566 #if MACH_LDEBUG
2567 __dead2
2568 #endif /* MACH_LDEBUG */
2569 void
lck_rw_clear_promotions_x86(thread_t thread)2570 lck_rw_clear_promotions_x86(thread_t thread)
2571 {
2572 #if MACH_LDEBUG
2573 /* It's fatal to leave a RW lock locked and return to userspace */
2574 panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread);
2575 #else
2576 /* Paper over the issue */
2577 thread->rwlock_count = 0;
2578 lck_rw_clear_promotion(thread, 0);
2579 #endif /* MACH_LDEBUG */
2580 }
2581 #endif /* __x86_64__ */
2582