1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <mach/task_policy.h>
37 #include <kern/sfi.h>
38 #include <kern/policy_internal.h>
39 #include <sys/errno.h>
40 #include <sys/ulock.h>
41
42 #include <mach/machine/sdt.h>
43
44 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
45 struct thread_qos_override, KT_DEFAULT);
46
47 #ifdef MACH_BSD
48 extern int proc_selfpid(void);
49 extern char * proc_name_address(void *p);
50 extern void rethrottle_thread(void * uthread);
51 #endif /* MACH_BSD */
52
53 #define QOS_EXTRACT(q) ((q) & 0xff)
54
55 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
56 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
57 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
59
60 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
61 QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
62
63 static void
64 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
65
66 /*
67 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
68 * to threads that don't have a QoS class set.
69 */
70 const qos_policy_params_t thread_qos_policy_params = {
71 /*
72 * This table defines the starting base priority of the thread,
73 * which will be modified by the thread importance and the task max priority
74 * before being applied.
75 */
76 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
77 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
78 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
79 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
80 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
81 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
82 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
83
84 /*
85 * This table defines the highest IO priority that a thread marked with this
86 * QoS class can have.
87 */
88 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
89 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
90 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
91 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
92 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
93 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
94 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
95
96 /*
97 * This table defines the highest QoS level that
98 * a thread marked with this QoS class can have.
99 */
100
101 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
102 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
103 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
104 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
105 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
106 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
107 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
108
109 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
110 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
111 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
112 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
113 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
114 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
115 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
116 };
117
118 static void
119 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
120
121 static int
122 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
123
124 static void
125 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
126
127 static void
128 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
129
130 static void
131 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
132
133 static void
134 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
135
136 static int
137 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
138
139 static int
140 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
141
142 static void
143 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
144
145 static void
146 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
147
148 boolean_t
thread_has_qos_policy(thread_t thread)149 thread_has_qos_policy(thread_t thread)
150 {
151 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
152 }
153
154
155 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)156 thread_remove_qos_policy_locked(thread_t thread,
157 task_pend_token_t pend_token)
158 {
159 __unused int prev_qos = thread->requested_policy.thrp_qos;
160
161 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
162
163 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
164 THREAD_QOS_UNSPECIFIED, 0, pend_token);
165 }
166
167 kern_return_t
thread_remove_qos_policy(thread_t thread)168 thread_remove_qos_policy(thread_t thread)
169 {
170 struct task_pend_token pend_token = {};
171
172 thread_mtx_lock(thread);
173 if (!thread->active) {
174 thread_mtx_unlock(thread);
175 return KERN_TERMINATED;
176 }
177
178 thread_remove_qos_policy_locked(thread, &pend_token);
179
180 thread_mtx_unlock(thread);
181
182 thread_policy_update_complete_unlocked(thread, &pend_token);
183
184 return KERN_SUCCESS;
185 }
186
187
188 boolean_t
thread_is_static_param(thread_t thread)189 thread_is_static_param(thread_t thread)
190 {
191 if (thread->static_param) {
192 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
193 return TRUE;
194 }
195 return FALSE;
196 }
197
198 /*
199 * Relative priorities can range between 0REL and -15REL. These
200 * map to QoS-specific ranges, to create non-overlapping priority
201 * ranges.
202 */
203 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)204 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
205 {
206 int next_lower_qos;
207
208 /* Fast path, since no validation or scaling is needed */
209 if (qos_relprio == 0) {
210 return 0;
211 }
212
213 switch (qos) {
214 case THREAD_QOS_USER_INTERACTIVE:
215 next_lower_qos = THREAD_QOS_USER_INITIATED;
216 break;
217 case THREAD_QOS_USER_INITIATED:
218 next_lower_qos = THREAD_QOS_LEGACY;
219 break;
220 case THREAD_QOS_LEGACY:
221 next_lower_qos = THREAD_QOS_UTILITY;
222 break;
223 case THREAD_QOS_UTILITY:
224 next_lower_qos = THREAD_QOS_BACKGROUND;
225 break;
226 case THREAD_QOS_MAINTENANCE:
227 case THREAD_QOS_BACKGROUND:
228 next_lower_qos = 0;
229 break;
230 default:
231 panic("Unrecognized QoS %d", qos);
232 return 0;
233 }
234
235 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
236 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
237
238 /*
239 * We now have the valid range that the scaled relative priority can map to. Note
240 * that the lower bound is exclusive, but the upper bound is inclusive. If the
241 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
242 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
243 * remainder.
244 */
245 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
246
247 return scaled_relprio;
248 }
249
250 /*
251 * flag set by -qos-policy-allow boot-arg to allow
252 * testing thread qos policy from userspace
253 */
254 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
255
256 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)257 thread_policy_set(
258 thread_t thread,
259 thread_policy_flavor_t flavor,
260 thread_policy_t policy_info,
261 mach_msg_type_number_t count)
262 {
263 thread_qos_policy_data_t req_qos;
264 kern_return_t kr;
265
266 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
267
268 if (thread == THREAD_NULL) {
269 return KERN_INVALID_ARGUMENT;
270 }
271
272 if (!allow_qos_policy_set) {
273 if (thread_is_static_param(thread)) {
274 return KERN_POLICY_STATIC;
275 }
276
277 if (flavor == THREAD_QOS_POLICY) {
278 return KERN_INVALID_ARGUMENT;
279 }
280
281 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
282 if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
283 return KERN_INVALID_ARGUMENT;
284 }
285 thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
286 if (info->priority != BASEPRI_RTQUEUES) {
287 return KERN_INVALID_ARGUMENT;
288 }
289 }
290 }
291
292 /* Threads without static_param set reset their QoS when other policies are applied. */
293 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
294 /* Store the existing tier, if we fail this call it is used to reset back. */
295 req_qos.qos_tier = thread->requested_policy.thrp_qos;
296 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
297
298 kr = thread_remove_qos_policy(thread);
299 if (kr != KERN_SUCCESS) {
300 return kr;
301 }
302 }
303
304 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
305
306 /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
307 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
308 if (kr != KERN_SUCCESS) {
309 /* Reset back to our original tier as the set failed. */
310 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
311 }
312 }
313
314 return kr;
315 }
316
317 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
318 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
319 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
320 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
321
322 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)323 thread_policy_set_internal(
324 thread_t thread,
325 thread_policy_flavor_t flavor,
326 thread_policy_t policy_info,
327 mach_msg_type_number_t count)
328 {
329 kern_return_t result = KERN_SUCCESS;
330 struct task_pend_token pend_token = {};
331
332 thread_mtx_lock(thread);
333 if (!thread->active) {
334 thread_mtx_unlock(thread);
335
336 return KERN_TERMINATED;
337 }
338
339 switch (flavor) {
340 case THREAD_EXTENDED_POLICY:
341 {
342 boolean_t timeshare = TRUE;
343
344 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
345 thread_extended_policy_t info;
346
347 info = (thread_extended_policy_t)policy_info;
348 timeshare = info->timeshare;
349 }
350
351 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
352
353 spl_t s = splsched();
354 thread_lock(thread);
355
356 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
357
358 thread_unlock(thread);
359 splx(s);
360
361 pend_token.tpt_update_thread_sfi = 1;
362
363 break;
364 }
365
366 case THREAD_TIME_CONSTRAINT_POLICY:
367 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
368 {
369 thread_time_constraint_with_priority_policy_t info;
370
371 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
372 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
373 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
374
375 if (count < min_count) {
376 result = KERN_INVALID_ARGUMENT;
377 break;
378 }
379
380 info = (thread_time_constraint_with_priority_policy_t)policy_info;
381
382
383 if (info->constraint < info->computation ||
384 info->computation > max_rt_quantum ||
385 info->computation < min_rt_quantum) {
386 result = KERN_INVALID_ARGUMENT;
387 break;
388 }
389
390 if (info->computation < (info->constraint / 2)) {
391 info->computation = (info->constraint / 2);
392 if (info->computation > max_rt_quantum) {
393 info->computation = max_rt_quantum;
394 }
395 }
396
397 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
398 if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
399 result = KERN_INVALID_ARGUMENT;
400 break;
401 }
402 }
403
404 spl_t s = splsched();
405 thread_lock(thread);
406
407 thread->realtime.period = info->period;
408 thread->realtime.computation = info->computation;
409 thread->realtime.constraint = info->constraint;
410 thread->realtime.preemptible = info->preemptible;
411 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
412 thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
413 } else {
414 thread->realtime.priority_offset = 0;
415 /* Or check for override from allowed thread group? */
416 }
417
418 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
419
420 thread_unlock(thread);
421 splx(s);
422
423 pend_token.tpt_update_thread_sfi = 1;
424
425 break;
426 }
427
428 case THREAD_PRECEDENCE_POLICY:
429 {
430 thread_precedence_policy_t info;
431
432 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
433 result = KERN_INVALID_ARGUMENT;
434 break;
435 }
436 info = (thread_precedence_policy_t)policy_info;
437
438 spl_t s = splsched();
439 thread_lock(thread);
440
441 thread->importance = info->importance;
442
443 thread_recompute_priority(thread);
444
445 thread_unlock(thread);
446 splx(s);
447
448 break;
449 }
450
451 case THREAD_AFFINITY_POLICY:
452 {
453 thread_affinity_policy_t info;
454
455 if (!thread_affinity_is_supported()) {
456 result = KERN_NOT_SUPPORTED;
457 break;
458 }
459 if (count < THREAD_AFFINITY_POLICY_COUNT) {
460 result = KERN_INVALID_ARGUMENT;
461 break;
462 }
463
464 info = (thread_affinity_policy_t) policy_info;
465 /*
466 * Unlock the thread mutex here and
467 * return directly after calling thread_affinity_set().
468 * This is necessary for correct lock ordering because
469 * thread_affinity_set() takes the task lock.
470 */
471 thread_mtx_unlock(thread);
472 return thread_affinity_set(thread, info->affinity_tag);
473 }
474
475 #if !defined(XNU_TARGET_OS_OSX)
476 case THREAD_BACKGROUND_POLICY:
477 {
478 thread_background_policy_t info;
479
480 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
481 result = KERN_INVALID_ARGUMENT;
482 break;
483 }
484
485 if (get_threadtask(thread) != current_task()) {
486 result = KERN_PROTECTION_FAILURE;
487 break;
488 }
489
490 info = (thread_background_policy_t) policy_info;
491
492 int enable;
493
494 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
495 enable = TASK_POLICY_ENABLE;
496 } else {
497 enable = TASK_POLICY_DISABLE;
498 }
499
500 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
501
502 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
503
504 break;
505 }
506 #endif /* !defined(XNU_TARGET_OS_OSX) */
507
508 case THREAD_THROUGHPUT_QOS_POLICY:
509 {
510 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
511 thread_throughput_qos_t tqos;
512
513 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
514 result = KERN_INVALID_ARGUMENT;
515 break;
516 }
517
518 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
519 break;
520 }
521
522 tqos = qos_extract(info->thread_throughput_qos_tier);
523
524 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
525 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
526
527 break;
528 }
529
530 case THREAD_LATENCY_QOS_POLICY:
531 {
532 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
533 thread_latency_qos_t lqos;
534
535 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
536 result = KERN_INVALID_ARGUMENT;
537 break;
538 }
539
540 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
541 break;
542 }
543
544 lqos = qos_extract(info->thread_latency_qos_tier);
545
546 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
547 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
548
549 break;
550 }
551
552 case THREAD_QOS_POLICY:
553 {
554 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
555
556 if (count < THREAD_QOS_POLICY_COUNT) {
557 result = KERN_INVALID_ARGUMENT;
558 break;
559 }
560
561 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
562 result = KERN_INVALID_ARGUMENT;
563 break;
564 }
565
566 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
567 result = KERN_INVALID_ARGUMENT;
568 break;
569 }
570
571 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
572 result = KERN_INVALID_ARGUMENT;
573 break;
574 }
575
576 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
577 info->qos_tier, -info->tier_importance, &pend_token);
578
579 break;
580 }
581
582 default:
583 result = KERN_INVALID_ARGUMENT;
584 break;
585 }
586
587 thread_mtx_unlock(thread);
588
589 thread_policy_update_complete_unlocked(thread, &pend_token);
590
591 return result;
592 }
593
594 /*
595 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
596 * Both result in FIXED mode scheduling.
597 */
598 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)599 convert_policy_to_sched_mode(integer_t policy)
600 {
601 switch (policy) {
602 case POLICY_TIMESHARE:
603 return TH_MODE_TIMESHARE;
604 case POLICY_RR:
605 case POLICY_FIFO:
606 return TH_MODE_FIXED;
607 default:
608 panic("unexpected sched policy: %d", policy);
609 return TH_MODE_NONE;
610 }
611 }
612
613 /*
614 * Called either with the thread mutex locked
615 * or from the pthread kext in a 'safe place'.
616 */
617 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)618 thread_set_mode_and_absolute_pri_internal(thread_t thread,
619 sched_mode_t mode,
620 integer_t priority,
621 task_pend_token_t pend_token)
622 {
623 kern_return_t kr = KERN_SUCCESS;
624
625 spl_t s = splsched();
626 thread_lock(thread);
627
628 /* This path isn't allowed to change a thread out of realtime. */
629 if ((thread->sched_mode == TH_MODE_REALTIME) ||
630 (thread->saved_mode == TH_MODE_REALTIME)) {
631 kr = KERN_FAILURE;
632 goto unlock;
633 }
634
635 if (thread->policy_reset) {
636 kr = KERN_SUCCESS;
637 goto unlock;
638 }
639
640 sched_mode_t old_mode = thread->sched_mode;
641
642 /*
643 * Reverse engineer and apply the correct importance value
644 * from the requested absolute priority value.
645 *
646 * TODO: Store the absolute priority value instead
647 */
648
649 if (priority >= thread->max_priority) {
650 priority = thread->max_priority - thread->task_priority;
651 } else if (priority >= MINPRI_KERNEL) {
652 priority -= MINPRI_KERNEL;
653 } else if (priority >= MINPRI_RESERVED) {
654 priority -= MINPRI_RESERVED;
655 } else {
656 priority -= BASEPRI_DEFAULT;
657 }
658
659 priority += thread->task_priority;
660
661 if (priority > thread->max_priority) {
662 priority = thread->max_priority;
663 } else if (priority < MINPRI) {
664 priority = MINPRI;
665 }
666
667 thread->importance = priority - thread->task_priority;
668
669 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
670
671 if (mode != old_mode) {
672 pend_token->tpt_update_thread_sfi = 1;
673 }
674
675 unlock:
676 thread_unlock(thread);
677 splx(s);
678
679 return kr;
680 }
681
682 void
thread_freeze_base_pri(thread_t thread)683 thread_freeze_base_pri(thread_t thread)
684 {
685 assert(thread == current_thread());
686
687 spl_t s = splsched();
688 thread_lock(thread);
689
690 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
691 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
692
693 thread_unlock(thread);
694 splx(s);
695 }
696
697 bool
thread_unfreeze_base_pri(thread_t thread)698 thread_unfreeze_base_pri(thread_t thread)
699 {
700 assert(thread == current_thread());
701 integer_t base_pri;
702 ast_t ast = 0;
703
704 spl_t s = splsched();
705 thread_lock(thread);
706
707 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
708 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
709
710 base_pri = thread->req_base_pri;
711 if (base_pri != thread->base_pri) {
712 /*
713 * This function returns "true" if the base pri change
714 * is the most likely cause for the preemption.
715 */
716 sched_set_thread_base_priority(thread, base_pri);
717 ast = ast_peek(AST_PREEMPT);
718 }
719
720 thread_unlock(thread);
721 splx(s);
722
723 return ast != 0;
724 }
725
726 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)727 thread_workq_pri_for_qos(thread_qos_t qos)
728 {
729 assert(qos < THREAD_QOS_LAST);
730 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
731 }
732
733 thread_qos_t
thread_workq_qos_for_pri(int priority)734 thread_workq_qos_for_pri(int priority)
735 {
736 thread_qos_t qos;
737 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
738 // indicate that workq should map >UI threads to workq's
739 // internal notation for above-UI work.
740 return THREAD_QOS_UNSPECIFIED;
741 }
742 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
743 // map a given priority up to the next nearest qos band.
744 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
745 return qos;
746 }
747 }
748 return THREAD_QOS_MAINTENANCE;
749 }
750
751 /*
752 * private interface for pthread workqueues
753 *
754 * Set scheduling policy & absolute priority for thread
755 * May be called with spinlocks held
756 * Thread mutex lock is not held
757 */
758 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)759 thread_reset_workq_qos(thread_t thread, uint32_t qos)
760 {
761 struct task_pend_token pend_token = {};
762
763 assert(qos < THREAD_QOS_LAST);
764
765 spl_t s = splsched();
766 thread_lock(thread);
767
768 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
769 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
770 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
771 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
772 &pend_token);
773
774 assert(pend_token.tpt_update_sockets == 0);
775
776 thread_unlock(thread);
777 splx(s);
778
779 thread_policy_update_complete_unlocked(thread, &pend_token);
780 }
781
782 /*
783 * private interface for pthread workqueues
784 *
785 * Set scheduling policy & absolute priority for thread
786 * May be called with spinlocks held
787 * Thread mutex lock is held
788 */
789 void
thread_set_workq_override(thread_t thread,uint32_t qos)790 thread_set_workq_override(thread_t thread, uint32_t qos)
791 {
792 struct task_pend_token pend_token = {};
793
794 assert(qos < THREAD_QOS_LAST);
795
796 spl_t s = splsched();
797 thread_lock(thread);
798
799 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
800 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
801
802 assert(pend_token.tpt_update_sockets == 0);
803
804 thread_unlock(thread);
805 splx(s);
806
807 thread_policy_update_complete_unlocked(thread, &pend_token);
808 }
809
810 /*
811 * private interface for pthread workqueues
812 *
813 * Set scheduling policy & absolute priority for thread
814 * May be called with spinlocks held
815 * Thread mutex lock is not held
816 */
817 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)818 thread_set_workq_pri(thread_t thread,
819 thread_qos_t qos,
820 integer_t priority,
821 integer_t policy)
822 {
823 struct task_pend_token pend_token = {};
824 sched_mode_t mode = convert_policy_to_sched_mode(policy);
825
826 assert(qos < THREAD_QOS_LAST);
827 assert(thread->static_param);
828
829 if (!thread->static_param || !thread->active) {
830 return;
831 }
832
833 spl_t s = splsched();
834 thread_lock(thread);
835
836 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
837 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
838 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
839 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
840 0, &pend_token);
841
842 thread_unlock(thread);
843 splx(s);
844
845 /* Concern: this doesn't hold the mutex... */
846
847 __assert_only kern_return_t kr;
848 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
849 &pend_token);
850 assert(kr == KERN_SUCCESS);
851
852 if (pend_token.tpt_update_thread_sfi) {
853 sfi_reevaluate(thread);
854 }
855 }
856
857 /*
858 * thread_set_mode_and_absolute_pri:
859 *
860 * Set scheduling policy & absolute priority for thread, for deprecated
861 * thread_set_policy and thread_policy interfaces.
862 *
863 * Called with nothing locked.
864 */
865 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)866 thread_set_mode_and_absolute_pri(thread_t thread,
867 integer_t policy,
868 integer_t priority)
869 {
870 kern_return_t kr = KERN_SUCCESS;
871 struct task_pend_token pend_token = {};
872
873 sched_mode_t mode = convert_policy_to_sched_mode(policy);
874
875 thread_mtx_lock(thread);
876
877 if (!thread->active) {
878 kr = KERN_TERMINATED;
879 goto unlock;
880 }
881
882 if (thread_is_static_param(thread)) {
883 kr = KERN_POLICY_STATIC;
884 goto unlock;
885 }
886
887 /* Setting legacy policies on threads kills the current QoS */
888 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
889 thread_remove_qos_policy_locked(thread, &pend_token);
890 }
891
892 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
893
894 unlock:
895 thread_mtx_unlock(thread);
896
897 thread_policy_update_complete_unlocked(thread, &pend_token);
898
899 return kr;
900 }
901
902 /*
903 * Set the thread's requested mode and recompute priority
904 * Called with thread mutex and thread locked
905 *
906 * TODO: Mitigate potential problems caused by moving thread to end of runq
907 * whenever its priority is recomputed
908 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
909 */
910 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)911 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
912 {
913 if (thread->policy_reset) {
914 return;
915 }
916
917 boolean_t removed = thread_run_queue_remove(thread);
918
919 /*
920 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
921 * That way there's zero confusion over which the user wants
922 * and which the kernel wants.
923 */
924 if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
925 thread->saved_mode = mode;
926 } else {
927 sched_set_thread_mode(thread, mode);
928 }
929
930 thread_recompute_priority(thread);
931
932 if (removed) {
933 thread_run_queue_reinsert(thread, SCHED_TAILQ);
934 }
935 }
936
937 /* called at splsched with thread lock locked */
938 static void
thread_update_qos_cpu_time_locked(thread_t thread)939 thread_update_qos_cpu_time_locked(thread_t thread)
940 {
941 task_t task = get_threadtask(thread);
942 uint64_t timer_sum, timer_delta;
943
944 /*
945 * This is only as accurate as the distance between
946 * last context switch (embedded) or last user/kernel boundary transition (desktop)
947 * because user_timer and system_timer are only updated then.
948 *
949 * TODO: Consider running a timer_update operation here to update it first.
950 * Maybe doable with interrupts disabled from current thread.
951 * If the thread is on a different core, may not be easy to get right.
952 *
953 * TODO: There should be a function for this in timer.c
954 */
955
956 timer_sum = timer_grab(&thread->user_timer);
957 timer_sum += timer_grab(&thread->system_timer);
958 timer_delta = timer_sum - thread->vtimer_qos_save;
959
960 thread->vtimer_qos_save = timer_sum;
961
962 uint64_t* task_counter = NULL;
963
964 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
965 switch (thread->effective_policy.thep_qos) {
966 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
967 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
968 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
969 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
970 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
971 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
972 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
973 default:
974 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
975 }
976
977 OSAddAtomic64(timer_delta, task_counter);
978
979 /* Update the task-level qos stats atomically, because we don't have the task lock. */
980 switch (thread->requested_policy.thrp_qos) {
981 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
982 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
983 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
984 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
985 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
986 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
987 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
988 default:
989 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
990 }
991
992 OSAddAtomic64(timer_delta, task_counter);
993 }
994
995 /*
996 * called with no thread locks held
997 * may hold task lock
998 */
999 void
thread_update_qos_cpu_time(thread_t thread)1000 thread_update_qos_cpu_time(thread_t thread)
1001 {
1002 thread_mtx_lock(thread);
1003
1004 spl_t s = splsched();
1005 thread_lock(thread);
1006
1007 thread_update_qos_cpu_time_locked(thread);
1008
1009 thread_unlock(thread);
1010 splx(s);
1011
1012 thread_mtx_unlock(thread);
1013 }
1014
1015 /*
1016 * Calculate base priority from thread attributes, and set it on the thread
1017 *
1018 * Called with thread_lock and thread mutex held.
1019 */
1020 extern boolean_t vps_dynamic_priority_enabled;
1021
1022 void
thread_recompute_priority(thread_t thread)1023 thread_recompute_priority(
1024 thread_t thread)
1025 {
1026 integer_t priority;
1027
1028 if (thread->policy_reset) {
1029 return;
1030 }
1031
1032 if (thread->sched_mode == TH_MODE_REALTIME) {
1033 uint8_t i = thread->realtime.priority_offset;
1034 assert((i >= 0) && (i < NRTQS));
1035 priority = BASEPRI_RTQUEUES + i;
1036 sched_set_thread_base_priority(thread, priority);
1037 if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1038 /* Make sure the thread has a valid deadline */
1039 uint64_t ctime = mach_absolute_time();
1040 thread->realtime.deadline = thread->realtime.constraint + ctime;
1041 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1042 (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1043 }
1044 return;
1045 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1046 int qos = thread->effective_policy.thep_qos;
1047 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1048 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1049 int qos_scaled_relprio;
1050
1051 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1052 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1053
1054 priority = thread_qos_policy_params.qos_pri[qos];
1055 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1056
1057 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1058 /* Bump priority 46 to 47 when in a frontmost app */
1059 qos_scaled_relprio += 1;
1060 }
1061
1062 /* TODO: factor in renice priority here? */
1063
1064 priority += qos_scaled_relprio;
1065 } else {
1066 if (thread->importance > MAXPRI) {
1067 priority = MAXPRI;
1068 } else if (thread->importance < -MAXPRI) {
1069 priority = -MAXPRI;
1070 } else {
1071 priority = thread->importance;
1072 }
1073
1074 priority += thread->task_priority;
1075 }
1076
1077 priority = MAX(priority, thread->user_promotion_basepri);
1078
1079 /*
1080 * Clamp priority back into the allowed range for this task.
1081 * The initial priority value could be out of this range due to:
1082 * Task clamped to BG or Utility (max-pri is 4, or 20)
1083 * Task is user task (max-pri is 63)
1084 * Task is kernel task (max-pri is 95)
1085 * Note that thread->importance is user-settable to any integer
1086 * via THREAD_PRECEDENCE_POLICY.
1087 */
1088 if (priority > thread->max_priority) {
1089 if (thread->effective_policy.thep_promote_above_task) {
1090 priority = MAX(thread->max_priority, thread->user_promotion_basepri);
1091 } else {
1092 priority = thread->max_priority;
1093 }
1094 } else if (priority < MINPRI) {
1095 priority = MINPRI;
1096 }
1097
1098 if (thread->saved_mode == TH_MODE_REALTIME &&
1099 thread->sched_flags & TH_SFLAG_FAILSAFE) {
1100 priority = DEPRESSPRI;
1101 }
1102
1103 if (thread->effective_policy.thep_terminated == TRUE) {
1104 /*
1105 * We temporarily want to override the expected priority to
1106 * ensure that the thread exits in a timely manner.
1107 * Note that this is allowed to exceed thread->max_priority
1108 * so that the thread is no longer clamped to background
1109 * during the final exit phase.
1110 */
1111 if (priority < thread->task_priority) {
1112 priority = thread->task_priority;
1113 }
1114 if (priority < BASEPRI_DEFAULT) {
1115 priority = BASEPRI_DEFAULT;
1116 }
1117 }
1118
1119 #if !defined(XNU_TARGET_OS_OSX)
1120 /* No one can have a base priority less than MAXPRI_THROTTLE */
1121 if (priority < MAXPRI_THROTTLE) {
1122 priority = MAXPRI_THROTTLE;
1123 }
1124 #endif /* !defined(XNU_TARGET_OS_OSX) */
1125
1126 sched_set_thread_base_priority(thread, priority);
1127 }
1128
1129 /* Called with the task lock held, but not the thread mutex or spinlock */
1130 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1131 thread_policy_update_tasklocked(
1132 thread_t thread,
1133 integer_t priority,
1134 integer_t max_priority,
1135 task_pend_token_t pend_token)
1136 {
1137 thread_mtx_lock(thread);
1138
1139 if (!thread->active || thread->policy_reset) {
1140 thread_mtx_unlock(thread);
1141 return;
1142 }
1143
1144 spl_t s = splsched();
1145 thread_lock(thread);
1146
1147 __unused
1148 integer_t old_max_priority = thread->max_priority;
1149
1150 assert(priority >= INT16_MIN && priority <= INT16_MAX);
1151 thread->task_priority = (int16_t)priority;
1152
1153 assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1154 thread->max_priority = (int16_t)max_priority;
1155
1156 /*
1157 * When backgrounding a thread, realtime and fixed priority threads
1158 * should be demoted to timeshare background threads.
1159 *
1160 * TODO: Do this inside the thread policy update routine in order to avoid double
1161 * remove/reinsert for a runnable thread
1162 */
1163 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1164 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1165 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1166 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1167 }
1168
1169 thread_policy_update_spinlocked(thread, true, pend_token);
1170
1171 thread_unlock(thread);
1172 splx(s);
1173
1174 thread_mtx_unlock(thread);
1175 }
1176
1177 /*
1178 * Reset thread to default state in preparation for termination
1179 * Called with thread mutex locked
1180 *
1181 * Always called on current thread, so we don't need a run queue remove
1182 */
1183 void
thread_policy_reset(thread_t thread)1184 thread_policy_reset(
1185 thread_t thread)
1186 {
1187 spl_t s;
1188
1189 assert(thread == current_thread());
1190
1191 s = splsched();
1192 thread_lock(thread);
1193
1194 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1195 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1196 }
1197
1198 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1199 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1200 }
1201
1202 /* At this point, the various demotions should be inactive */
1203 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1204 assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1205 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1206
1207 /* Reset thread back to task-default basepri and mode */
1208 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1209
1210 sched_set_thread_mode(thread, newmode);
1211
1212 thread->importance = 0;
1213
1214 /* Prevent further changes to thread base priority or mode */
1215 thread->policy_reset = 1;
1216
1217 sched_set_thread_base_priority(thread, thread->task_priority);
1218
1219 thread_unlock(thread);
1220 splx(s);
1221 }
1222
1223 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1224 thread_policy_get(
1225 thread_t thread,
1226 thread_policy_flavor_t flavor,
1227 thread_policy_t policy_info,
1228 mach_msg_type_number_t *count,
1229 boolean_t *get_default)
1230 {
1231 kern_return_t result = KERN_SUCCESS;
1232
1233 if (thread == THREAD_NULL) {
1234 return KERN_INVALID_ARGUMENT;
1235 }
1236
1237 thread_mtx_lock(thread);
1238 if (!thread->active) {
1239 thread_mtx_unlock(thread);
1240
1241 return KERN_TERMINATED;
1242 }
1243
1244 switch (flavor) {
1245 case THREAD_EXTENDED_POLICY:
1246 {
1247 boolean_t timeshare = TRUE;
1248
1249 if (!(*get_default)) {
1250 spl_t s = splsched();
1251 thread_lock(thread);
1252
1253 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1254 (thread->saved_mode != TH_MODE_REALTIME)) {
1255 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1256 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1257 } else {
1258 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1259 }
1260 } else {
1261 *get_default = TRUE;
1262 }
1263
1264 thread_unlock(thread);
1265 splx(s);
1266 }
1267
1268 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1269 thread_extended_policy_t info;
1270
1271 info = (thread_extended_policy_t)policy_info;
1272 info->timeshare = timeshare;
1273 }
1274
1275 break;
1276 }
1277
1278 case THREAD_TIME_CONSTRAINT_POLICY:
1279 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1280 {
1281 thread_time_constraint_with_priority_policy_t info;
1282
1283 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1284 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1285 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1286
1287 if (*count < min_count) {
1288 result = KERN_INVALID_ARGUMENT;
1289 break;
1290 }
1291
1292 info = (thread_time_constraint_with_priority_policy_t)policy_info;
1293
1294 if (!(*get_default)) {
1295 spl_t s = splsched();
1296 thread_lock(thread);
1297
1298 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1299 (thread->saved_mode == TH_MODE_REALTIME)) {
1300 info->period = thread->realtime.period;
1301 info->computation = thread->realtime.computation;
1302 info->constraint = thread->realtime.constraint;
1303 info->preemptible = thread->realtime.preemptible;
1304 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1305 info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1306 }
1307 } else {
1308 *get_default = TRUE;
1309 }
1310
1311 thread_unlock(thread);
1312 splx(s);
1313 }
1314
1315 if (*get_default) {
1316 info->period = 0;
1317 info->computation = default_timeshare_computation;
1318 info->constraint = default_timeshare_constraint;
1319 info->preemptible = TRUE;
1320 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1321 info->priority = BASEPRI_RTQUEUES;
1322 }
1323 }
1324
1325
1326 break;
1327 }
1328
1329 case THREAD_PRECEDENCE_POLICY:
1330 {
1331 thread_precedence_policy_t info;
1332
1333 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1334 result = KERN_INVALID_ARGUMENT;
1335 break;
1336 }
1337
1338 info = (thread_precedence_policy_t)policy_info;
1339
1340 if (!(*get_default)) {
1341 spl_t s = splsched();
1342 thread_lock(thread);
1343
1344 info->importance = thread->importance;
1345
1346 thread_unlock(thread);
1347 splx(s);
1348 } else {
1349 info->importance = 0;
1350 }
1351
1352 break;
1353 }
1354
1355 case THREAD_AFFINITY_POLICY:
1356 {
1357 thread_affinity_policy_t info;
1358
1359 if (!thread_affinity_is_supported()) {
1360 result = KERN_NOT_SUPPORTED;
1361 break;
1362 }
1363 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1364 result = KERN_INVALID_ARGUMENT;
1365 break;
1366 }
1367
1368 info = (thread_affinity_policy_t)policy_info;
1369
1370 if (!(*get_default)) {
1371 info->affinity_tag = thread_affinity_get(thread);
1372 } else {
1373 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1374 }
1375
1376 break;
1377 }
1378
1379 case THREAD_POLICY_STATE:
1380 {
1381 thread_policy_state_t info;
1382
1383 if (*count < THREAD_POLICY_STATE_COUNT) {
1384 result = KERN_INVALID_ARGUMENT;
1385 break;
1386 }
1387
1388 /* Only root can get this info */
1389 if (!task_is_privileged(current_task())) {
1390 result = KERN_PROTECTION_FAILURE;
1391 break;
1392 }
1393
1394 info = (thread_policy_state_t)(void*)policy_info;
1395
1396 if (!(*get_default)) {
1397 info->flags = 0;
1398
1399 spl_t s = splsched();
1400 thread_lock(thread);
1401
1402 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1403
1404 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1405 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1406
1407 info->thps_user_promotions = 0;
1408 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1409 info->thps_ipc_overrides = thread->kevent_overrides;
1410
1411 proc_get_thread_policy_bitfield(thread, info);
1412
1413 thread_unlock(thread);
1414 splx(s);
1415 } else {
1416 info->requested = 0;
1417 info->effective = 0;
1418 info->pending = 0;
1419 }
1420
1421 break;
1422 }
1423
1424 case THREAD_LATENCY_QOS_POLICY:
1425 {
1426 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1427 thread_latency_qos_t plqos;
1428
1429 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1430 result = KERN_INVALID_ARGUMENT;
1431 break;
1432 }
1433
1434 if (*get_default) {
1435 plqos = 0;
1436 } else {
1437 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1438 }
1439
1440 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1441 }
1442 break;
1443
1444 case THREAD_THROUGHPUT_QOS_POLICY:
1445 {
1446 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1447 thread_throughput_qos_t ptqos;
1448
1449 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1450 result = KERN_INVALID_ARGUMENT;
1451 break;
1452 }
1453
1454 if (*get_default) {
1455 ptqos = 0;
1456 } else {
1457 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1458 }
1459
1460 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1461 }
1462 break;
1463
1464 case THREAD_QOS_POLICY:
1465 {
1466 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1467
1468 if (*count < THREAD_QOS_POLICY_COUNT) {
1469 result = KERN_INVALID_ARGUMENT;
1470 break;
1471 }
1472
1473 if (!(*get_default)) {
1474 int relprio_value = 0;
1475 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1476 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1477
1478 info->tier_importance = -relprio_value;
1479 } else {
1480 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1481 info->tier_importance = 0;
1482 }
1483
1484 break;
1485 }
1486
1487 default:
1488 result = KERN_INVALID_ARGUMENT;
1489 break;
1490 }
1491
1492 thread_mtx_unlock(thread);
1493
1494 return result;
1495 }
1496
1497 void
thread_policy_create(thread_t thread)1498 thread_policy_create(thread_t thread)
1499 {
1500 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1501 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1502 thread_tid(thread), theffective_0(thread),
1503 theffective_1(thread), thread->base_pri, 0);
1504
1505 /* We pass a pend token but ignore it */
1506 struct task_pend_token pend_token = {};
1507
1508 thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1509
1510 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1511 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1512 thread_tid(thread), theffective_0(thread),
1513 theffective_1(thread), thread->base_pri, 0);
1514 }
1515
1516 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1517 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1518 {
1519 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1520 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1521 thread_tid(thread), theffective_0(thread),
1522 theffective_1(thread), thread->base_pri, 0);
1523
1524 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1525
1526 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1527 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1528 thread_tid(thread), theffective_0(thread),
1529 theffective_1(thread), thread->base_pri, 0);
1530 }
1531
1532
1533
1534 /*
1535 * One thread state update function TO RULE THEM ALL
1536 *
1537 * This function updates the thread effective policy fields
1538 * and pushes the results to the relevant subsystems.
1539 *
1540 * Returns TRUE if a pended action needs to be run.
1541 *
1542 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1543 */
1544 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1545 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1546 task_pend_token_t pend_token)
1547 {
1548 /*
1549 * Step 1:
1550 * Gather requested policy and effective task state
1551 */
1552
1553 struct thread_requested_policy requested = thread->requested_policy;
1554 struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1555
1556 /*
1557 * Step 2:
1558 * Calculate new effective policies from requested policy, task and thread state
1559 * Rules:
1560 * Don't change requested, it won't take effect
1561 */
1562
1563 struct thread_effective_policy next = {};
1564
1565 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1566
1567 uint32_t next_qos = requested.thrp_qos;
1568
1569 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1570 next_qos = MAX(requested.thrp_qos_override, next_qos);
1571 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1572 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1573 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1574 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1575 }
1576
1577 if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1578 requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1579 /*
1580 * This thread is turnstile-boosted higher than the adaptive clamp
1581 * by a synchronous waiter. Allow that to override the adaptive
1582 * clamp temporarily for this thread only.
1583 */
1584 next.thep_promote_above_task = true;
1585 next_qos = requested.thrp_qos_promote;
1586 }
1587
1588 next.thep_qos = next_qos;
1589
1590 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1591 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1592 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1593 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1594 } else {
1595 next.thep_qos = task_effective.tep_qos_clamp;
1596 }
1597 }
1598
1599 /*
1600 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1601 * This allows QoS promotions to work properly even after the process is unclamped.
1602 */
1603 next.thep_qos_promote = next.thep_qos;
1604
1605 /* The ceiling only applies to threads that are in the QoS world */
1606 /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1607 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1608 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1609 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1610 }
1611
1612 /*
1613 * The QoS relative priority is only applicable when the original programmer's
1614 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1615 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1616 * since otherwise it would be lower than unclamped threads. Similarly, in the
1617 * presence of boosting, the programmer doesn't know what other actors
1618 * are boosting the thread.
1619 */
1620 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1621 (requested.thrp_qos == next.thep_qos) &&
1622 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1623 next.thep_qos_relprio = requested.thrp_qos_relprio;
1624 } else {
1625 next.thep_qos_relprio = 0;
1626 }
1627
1628 /* Calculate DARWIN_BG */
1629 bool wants_darwinbg = false;
1630 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */
1631
1632 if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1633 wants_darwinbg = true;
1634 }
1635
1636 /*
1637 * If DARWIN_BG has been requested at either level, it's engaged.
1638 * darwinbg threads always create bg sockets,
1639 * but only some types of darwinbg change the sockets
1640 * after they're created
1641 */
1642 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1643 wants_all_sockets_bg = wants_darwinbg = true;
1644 }
1645
1646 if (requested.thrp_pidbind_bg) {
1647 wants_all_sockets_bg = wants_darwinbg = true;
1648 }
1649
1650 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1651 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1652 wants_darwinbg = true;
1653 }
1654
1655 /* Calculate side effects of DARWIN_BG */
1656
1657 if (wants_darwinbg) {
1658 next.thep_darwinbg = 1;
1659 }
1660
1661 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1662 next.thep_new_sockets_bg = 1;
1663 }
1664
1665 /* Don't use task_effective.tep_all_sockets_bg here */
1666 if (wants_all_sockets_bg) {
1667 next.thep_all_sockets_bg = 1;
1668 }
1669
1670 /* darwinbg implies background QOS (or lower) */
1671 if (next.thep_darwinbg &&
1672 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1673 next.thep_qos = THREAD_QOS_BACKGROUND;
1674 next.thep_qos_relprio = 0;
1675 }
1676
1677 /* Calculate IO policy */
1678
1679 int iopol = THROTTLE_LEVEL_TIER0;
1680
1681 /* Factor in the task's IO policy */
1682 if (next.thep_darwinbg) {
1683 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1684 }
1685
1686 if (!next.thep_promote_above_task) {
1687 iopol = MAX(iopol, task_effective.tep_io_tier);
1688 }
1689
1690 /* Look up the associated IO tier value for the QoS class */
1691 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1692
1693 iopol = MAX(iopol, requested.thrp_int_iotier);
1694 iopol = MAX(iopol, requested.thrp_ext_iotier);
1695
1696 next.thep_io_tier = iopol;
1697
1698 /*
1699 * If a QoS override is causing IO to go into a lower tier, we also set
1700 * the passive bit so that a thread doesn't end up stuck in its own throttle
1701 * window when the override goes away.
1702 */
1703
1704 int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1705 int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1706 bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1707
1708 /* Calculate Passive IO policy */
1709 if (requested.thrp_ext_iopassive ||
1710 requested.thrp_int_iopassive ||
1711 qos_io_override_active ||
1712 task_effective.tep_io_passive) {
1713 next.thep_io_passive = 1;
1714 }
1715
1716 /* Calculate timer QOS */
1717 uint32_t latency_qos = requested.thrp_latency_qos;
1718
1719 if (!next.thep_promote_above_task) {
1720 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1721 }
1722
1723 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1724
1725 next.thep_latency_qos = latency_qos;
1726
1727 /* Calculate throughput QOS */
1728 uint32_t through_qos = requested.thrp_through_qos;
1729
1730 if (!next.thep_promote_above_task) {
1731 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1732 }
1733
1734 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1735
1736 next.thep_through_qos = through_qos;
1737
1738 if (task_effective.tep_terminated || requested.thrp_terminated) {
1739 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1740 next.thep_terminated = 1;
1741 next.thep_darwinbg = 0;
1742 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1743 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1744 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1745 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1746 }
1747
1748 /*
1749 * Step 3:
1750 * Swap out old policy for new policy
1751 */
1752
1753 struct thread_effective_policy prev = thread->effective_policy;
1754
1755 thread_update_qos_cpu_time_locked(thread);
1756
1757 /* This is the point where the new values become visible to other threads */
1758 thread->effective_policy = next;
1759
1760 /*
1761 * Step 4:
1762 * Pend updates that can't be done while holding the thread lock
1763 */
1764
1765 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1766 pend_token->tpt_update_sockets = 1;
1767 }
1768
1769 /* TODO: Doesn't this only need to be done if the throttle went up? */
1770 if (prev.thep_io_tier != next.thep_io_tier) {
1771 pend_token->tpt_update_throttle = 1;
1772 }
1773
1774 /*
1775 * Check for the attributes that sfi_thread_classify() consults,
1776 * and trigger SFI re-evaluation.
1777 */
1778 if (prev.thep_qos != next.thep_qos ||
1779 prev.thep_darwinbg != next.thep_darwinbg) {
1780 pend_token->tpt_update_thread_sfi = 1;
1781 }
1782
1783 integer_t old_base_pri = thread->base_pri;
1784
1785 /*
1786 * Step 5:
1787 * Update other subsystems as necessary if something has changed
1788 */
1789
1790 /* Check for the attributes that thread_recompute_priority() consults */
1791 if (prev.thep_qos != next.thep_qos ||
1792 prev.thep_qos_relprio != next.thep_qos_relprio ||
1793 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1794 prev.thep_promote_above_task != next.thep_promote_above_task ||
1795 prev.thep_terminated != next.thep_terminated ||
1796 pend_token->tpt_force_recompute_pri == 1 ||
1797 recompute_priority) {
1798 thread_recompute_priority(thread);
1799 }
1800
1801 /*
1802 * Check if the thread is waiting on a turnstile and needs priority propagation.
1803 */
1804 if (pend_token->tpt_update_turnstile &&
1805 ((old_base_pri == thread->base_pri) ||
1806 !thread_get_waiting_turnstile(thread))) {
1807 /*
1808 * Reset update turnstile pend token since either
1809 * the thread priority did not change or thread is
1810 * not blocked on a turnstile.
1811 */
1812 pend_token->tpt_update_turnstile = 0;
1813 }
1814 }
1815
1816
1817 /*
1818 * Initiate a thread policy state transition on a thread with its TID
1819 * Useful if you cannot guarantee the thread won't get terminated
1820 * Precondition: No locks are held
1821 * Will take task lock - using the non-tid variant is faster
1822 * if you already have a thread ref.
1823 */
1824 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1825 proc_set_thread_policy_with_tid(task_t task,
1826 uint64_t tid,
1827 int category,
1828 int flavor,
1829 int value)
1830 {
1831 /* takes task lock, returns ref'ed thread or NULL */
1832 thread_t thread = task_findtid(task, tid);
1833
1834 if (thread == THREAD_NULL) {
1835 return;
1836 }
1837
1838 proc_set_thread_policy(thread, category, flavor, value);
1839
1840 thread_deallocate(thread);
1841 }
1842
1843 /*
1844 * Initiate a thread policy transition on a thread
1845 * This path supports networking transitions (i.e. darwinbg transitions)
1846 * Precondition: No locks are held
1847 */
1848 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1849 proc_set_thread_policy(thread_t thread,
1850 int category,
1851 int flavor,
1852 int value)
1853 {
1854 struct task_pend_token pend_token = {};
1855
1856 thread_mtx_lock(thread);
1857
1858 proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1859
1860 thread_mtx_unlock(thread);
1861
1862 thread_policy_update_complete_unlocked(thread, &pend_token);
1863 }
1864
1865 /*
1866 * Do the things that can't be done while holding a thread mutex.
1867 * These are set up to call back into thread policy to get the latest value,
1868 * so they don't have to be synchronized with the update.
1869 * The only required semantic is 'call this sometime after updating effective policy'
1870 *
1871 * Precondition: Thread mutex is not held
1872 *
1873 * This may be called with the task lock held, but in that case it won't be
1874 * called with tpt_update_sockets set.
1875 */
1876 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1877 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1878 {
1879 #ifdef MACH_BSD
1880 if (pend_token->tpt_update_sockets) {
1881 proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1882 }
1883 #endif /* MACH_BSD */
1884
1885 if (pend_token->tpt_update_throttle) {
1886 rethrottle_thread(get_bsdthread_info(thread));
1887 }
1888
1889 if (pend_token->tpt_update_thread_sfi) {
1890 sfi_reevaluate(thread);
1891 }
1892
1893 if (pend_token->tpt_update_turnstile) {
1894 turnstile_update_thread_priority_chain(thread);
1895 }
1896 }
1897
1898 /*
1899 * Set and update thread policy
1900 * Thread mutex might be held
1901 */
1902 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1903 proc_set_thread_policy_locked(thread_t thread,
1904 int category,
1905 int flavor,
1906 int value,
1907 int value2,
1908 task_pend_token_t pend_token)
1909 {
1910 spl_t s = splsched();
1911 thread_lock(thread);
1912
1913 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1914
1915 thread_unlock(thread);
1916 splx(s);
1917 }
1918
1919 /*
1920 * Set and update thread policy
1921 * Thread spinlock is held
1922 */
1923 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1924 proc_set_thread_policy_spinlocked(thread_t thread,
1925 int category,
1926 int flavor,
1927 int value,
1928 int value2,
1929 task_pend_token_t pend_token)
1930 {
1931 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1932 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1933 thread_tid(thread), threquested_0(thread),
1934 threquested_1(thread), value, 0);
1935
1936 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1937
1938 thread_policy_update_spinlocked(thread, false, pend_token);
1939
1940 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1941 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1942 thread_tid(thread), threquested_0(thread),
1943 threquested_1(thread), tpending(pend_token), 0);
1944 }
1945
1946 /*
1947 * Set the requested state for a specific flavor to a specific value.
1948 */
1949 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1950 thread_set_requested_policy_spinlocked(thread_t thread,
1951 int category,
1952 int flavor,
1953 int value,
1954 int value2,
1955 task_pend_token_t pend_token)
1956 {
1957 int tier, passive;
1958
1959 struct thread_requested_policy requested = thread->requested_policy;
1960
1961 switch (flavor) {
1962 /* Category: EXTERNAL and INTERNAL, thread and task */
1963
1964 case TASK_POLICY_DARWIN_BG:
1965 if (category == TASK_POLICY_EXTERNAL) {
1966 requested.thrp_ext_darwinbg = value;
1967 } else {
1968 requested.thrp_int_darwinbg = value;
1969 }
1970 break;
1971
1972 case TASK_POLICY_IOPOL:
1973 proc_iopol_to_tier(value, &tier, &passive);
1974 if (category == TASK_POLICY_EXTERNAL) {
1975 requested.thrp_ext_iotier = tier;
1976 requested.thrp_ext_iopassive = passive;
1977 } else {
1978 requested.thrp_int_iotier = tier;
1979 requested.thrp_int_iopassive = passive;
1980 }
1981 break;
1982
1983 case TASK_POLICY_IO:
1984 if (category == TASK_POLICY_EXTERNAL) {
1985 requested.thrp_ext_iotier = value;
1986 } else {
1987 requested.thrp_int_iotier = value;
1988 }
1989 break;
1990
1991 case TASK_POLICY_PASSIVE_IO:
1992 if (category == TASK_POLICY_EXTERNAL) {
1993 requested.thrp_ext_iopassive = value;
1994 } else {
1995 requested.thrp_int_iopassive = value;
1996 }
1997 break;
1998
1999 /* Category: ATTRIBUTE, thread only */
2000
2001 case TASK_POLICY_PIDBIND_BG:
2002 assert(category == TASK_POLICY_ATTRIBUTE);
2003 requested.thrp_pidbind_bg = value;
2004 break;
2005
2006 case TASK_POLICY_LATENCY_QOS:
2007 assert(category == TASK_POLICY_ATTRIBUTE);
2008 requested.thrp_latency_qos = value;
2009 break;
2010
2011 case TASK_POLICY_THROUGH_QOS:
2012 assert(category == TASK_POLICY_ATTRIBUTE);
2013 requested.thrp_through_qos = value;
2014 break;
2015
2016 case TASK_POLICY_QOS_OVERRIDE:
2017 assert(category == TASK_POLICY_ATTRIBUTE);
2018 requested.thrp_qos_override = value;
2019 pend_token->tpt_update_turnstile = 1;
2020 break;
2021
2022 case TASK_POLICY_QOS_AND_RELPRIO:
2023 assert(category == TASK_POLICY_ATTRIBUTE);
2024 requested.thrp_qos = value;
2025 requested.thrp_qos_relprio = value2;
2026 pend_token->tpt_update_turnstile = 1;
2027 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2028 break;
2029
2030 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2031 assert(category == TASK_POLICY_ATTRIBUTE);
2032 requested.thrp_qos_workq_override = value;
2033 pend_token->tpt_update_turnstile = 1;
2034 break;
2035
2036 case TASK_POLICY_QOS_PROMOTE:
2037 assert(category == TASK_POLICY_ATTRIBUTE);
2038 requested.thrp_qos_promote = value;
2039 break;
2040
2041 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2042 assert(category == TASK_POLICY_ATTRIBUTE);
2043 requested.thrp_qos_kevent_override = value;
2044 pend_token->tpt_update_turnstile = 1;
2045 break;
2046
2047 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2048 assert(category == TASK_POLICY_ATTRIBUTE);
2049 requested.thrp_qos_wlsvc_override = value;
2050 pend_token->tpt_update_turnstile = 1;
2051 break;
2052
2053 case TASK_POLICY_TERMINATED:
2054 assert(category == TASK_POLICY_ATTRIBUTE);
2055 requested.thrp_terminated = value;
2056 break;
2057
2058 default:
2059 panic("unknown task policy: %d %d %d", category, flavor, value);
2060 break;
2061 }
2062
2063 thread->requested_policy = requested;
2064 }
2065
2066 /*
2067 * Gets what you set. Effective values may be different.
2068 * Precondition: No locks are held
2069 */
2070 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2071 proc_get_thread_policy(thread_t thread,
2072 int category,
2073 int flavor)
2074 {
2075 int value = 0;
2076 thread_mtx_lock(thread);
2077 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2078 thread_mtx_unlock(thread);
2079 return value;
2080 }
2081
2082 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2083 proc_get_thread_policy_locked(thread_t thread,
2084 int category,
2085 int flavor,
2086 int* value2)
2087 {
2088 int value = 0;
2089
2090 spl_t s = splsched();
2091 thread_lock(thread);
2092
2093 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2094
2095 thread_unlock(thread);
2096 splx(s);
2097
2098 return value;
2099 }
2100
2101 /*
2102 * Gets what you set. Effective values may be different.
2103 */
2104 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2105 thread_get_requested_policy_spinlocked(thread_t thread,
2106 int category,
2107 int flavor,
2108 int* value2)
2109 {
2110 int value = 0;
2111
2112 struct thread_requested_policy requested = thread->requested_policy;
2113
2114 switch (flavor) {
2115 case TASK_POLICY_DARWIN_BG:
2116 if (category == TASK_POLICY_EXTERNAL) {
2117 value = requested.thrp_ext_darwinbg;
2118 } else {
2119 value = requested.thrp_int_darwinbg;
2120 }
2121 break;
2122 case TASK_POLICY_IOPOL:
2123 if (category == TASK_POLICY_EXTERNAL) {
2124 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2125 requested.thrp_ext_iopassive);
2126 } else {
2127 value = proc_tier_to_iopol(requested.thrp_int_iotier,
2128 requested.thrp_int_iopassive);
2129 }
2130 break;
2131 case TASK_POLICY_IO:
2132 if (category == TASK_POLICY_EXTERNAL) {
2133 value = requested.thrp_ext_iotier;
2134 } else {
2135 value = requested.thrp_int_iotier;
2136 }
2137 break;
2138 case TASK_POLICY_PASSIVE_IO:
2139 if (category == TASK_POLICY_EXTERNAL) {
2140 value = requested.thrp_ext_iopassive;
2141 } else {
2142 value = requested.thrp_int_iopassive;
2143 }
2144 break;
2145 case TASK_POLICY_QOS:
2146 assert(category == TASK_POLICY_ATTRIBUTE);
2147 value = requested.thrp_qos;
2148 break;
2149 case TASK_POLICY_QOS_OVERRIDE:
2150 assert(category == TASK_POLICY_ATTRIBUTE);
2151 value = requested.thrp_qos_override;
2152 break;
2153 case TASK_POLICY_LATENCY_QOS:
2154 assert(category == TASK_POLICY_ATTRIBUTE);
2155 value = requested.thrp_latency_qos;
2156 break;
2157 case TASK_POLICY_THROUGH_QOS:
2158 assert(category == TASK_POLICY_ATTRIBUTE);
2159 value = requested.thrp_through_qos;
2160 break;
2161 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2162 assert(category == TASK_POLICY_ATTRIBUTE);
2163 value = requested.thrp_qos_workq_override;
2164 break;
2165 case TASK_POLICY_QOS_AND_RELPRIO:
2166 assert(category == TASK_POLICY_ATTRIBUTE);
2167 assert(value2 != NULL);
2168 value = requested.thrp_qos;
2169 *value2 = requested.thrp_qos_relprio;
2170 break;
2171 case TASK_POLICY_QOS_PROMOTE:
2172 assert(category == TASK_POLICY_ATTRIBUTE);
2173 value = requested.thrp_qos_promote;
2174 break;
2175 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2176 assert(category == TASK_POLICY_ATTRIBUTE);
2177 value = requested.thrp_qos_kevent_override;
2178 break;
2179 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2180 assert(category == TASK_POLICY_ATTRIBUTE);
2181 value = requested.thrp_qos_wlsvc_override;
2182 break;
2183 case TASK_POLICY_TERMINATED:
2184 assert(category == TASK_POLICY_ATTRIBUTE);
2185 value = requested.thrp_terminated;
2186 break;
2187
2188 default:
2189 panic("unknown policy_flavor %d", flavor);
2190 break;
2191 }
2192
2193 return value;
2194 }
2195
2196 /*
2197 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2198 *
2199 * NOTE: This accessor does not take the task or thread lock.
2200 * Notifications of state updates need to be externally synchronized with state queries.
2201 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2202 * within the context of a timer interrupt.
2203 *
2204 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2205 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2206 * I don't think that cost is worth not having the right answer.
2207 */
2208 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2209 proc_get_effective_thread_policy(thread_t thread,
2210 int flavor)
2211 {
2212 int value = 0;
2213
2214 switch (flavor) {
2215 case TASK_POLICY_DARWIN_BG:
2216 /*
2217 * This call is used within the timer layer, as well as
2218 * prioritizing requests to the graphics system.
2219 * It also informs SFI and originator-bg-state.
2220 * Returns 1 for background mode, 0 for normal mode
2221 */
2222
2223 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2224 break;
2225 case TASK_POLICY_IO:
2226 /*
2227 * The I/O system calls here to find out what throttling tier to apply to an operation.
2228 * Returns THROTTLE_LEVEL_* values
2229 */
2230 value = thread->effective_policy.thep_io_tier;
2231 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2232 value = MIN(value, thread->iotier_override);
2233 }
2234 break;
2235 case TASK_POLICY_PASSIVE_IO:
2236 /*
2237 * The I/O system calls here to find out whether an operation should be passive.
2238 * (i.e. not cause operations with lower throttle tiers to be throttled)
2239 * Returns 1 for passive mode, 0 for normal mode
2240 *
2241 * If an override is causing IO to go into a lower tier, we also set
2242 * the passive bit so that a thread doesn't end up stuck in its own throttle
2243 * window when the override goes away.
2244 */
2245 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2246 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2247 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2248 value = 1;
2249 }
2250 break;
2251 case TASK_POLICY_ALL_SOCKETS_BG:
2252 /*
2253 * do_background_socket() calls this to determine whether
2254 * it should change the thread's sockets
2255 * Returns 1 for background mode, 0 for normal mode
2256 * This consults both thread and task so un-DBGing a thread while the task is BG
2257 * doesn't get you out of the network throttle.
2258 */
2259 value = (thread->effective_policy.thep_all_sockets_bg ||
2260 get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2261 break;
2262 case TASK_POLICY_NEW_SOCKETS_BG:
2263 /*
2264 * socreate() calls this to determine if it should mark a new socket as background
2265 * Returns 1 for background mode, 0 for normal mode
2266 */
2267 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2268 break;
2269 case TASK_POLICY_LATENCY_QOS:
2270 /*
2271 * timer arming calls into here to find out the timer coalescing level
2272 * Returns a latency QoS tier (0-6)
2273 */
2274 value = thread->effective_policy.thep_latency_qos;
2275 break;
2276 case TASK_POLICY_THROUGH_QOS:
2277 /*
2278 * This value is passed into the urgency callout from the scheduler
2279 * to the performance management subsystem.
2280 *
2281 * Returns a throughput QoS tier (0-6)
2282 */
2283 value = thread->effective_policy.thep_through_qos;
2284 break;
2285 case TASK_POLICY_QOS:
2286 /*
2287 * This is communicated to the performance management layer and SFI.
2288 *
2289 * Returns a QoS policy tier
2290 */
2291 value = thread->effective_policy.thep_qos;
2292 break;
2293 default:
2294 panic("unknown thread policy flavor %d", flavor);
2295 break;
2296 }
2297
2298 return value;
2299 }
2300
2301
2302 /*
2303 * (integer_t) casts limit the number of bits we can fit here
2304 * this interface is deprecated and replaced by the _EXT struct ?
2305 */
2306 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2307 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2308 {
2309 uint64_t bits = 0;
2310 struct thread_requested_policy requested = thread->requested_policy;
2311
2312 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2313 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2314 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2315 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2316 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2317 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2318
2319 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2320 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2321
2322 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2323
2324 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2325 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2326
2327 info->requested = (integer_t) bits;
2328 bits = 0;
2329
2330 struct thread_effective_policy effective = thread->effective_policy;
2331
2332 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2333
2334 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2335 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2336 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2337 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2338
2339 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2340
2341 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2342 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2343
2344 info->effective = (integer_t)bits;
2345 bits = 0;
2346
2347 info->pending = 0;
2348 }
2349
2350 /*
2351 * Sneakily trace either the task and thread requested
2352 * or just the thread requested, depending on if we have enough room.
2353 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2354 *
2355 * LP32 LP64
2356 * threquested_0(thread) thread[0] task[0]
2357 * threquested_1(thread) thread[1] thread[0]
2358 *
2359 */
2360
2361 uintptr_t
threquested_0(thread_t thread)2362 threquested_0(thread_t thread)
2363 {
2364 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2365
2366 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2367
2368 return raw[0];
2369 }
2370
2371 uintptr_t
threquested_1(thread_t thread)2372 threquested_1(thread_t thread)
2373 {
2374 #if defined __LP64__
2375 return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2376 #else
2377 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2378 return raw[1];
2379 #endif
2380 }
2381
2382 uintptr_t
theffective_0(thread_t thread)2383 theffective_0(thread_t thread)
2384 {
2385 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2386
2387 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2388 return raw[0];
2389 }
2390
2391 uintptr_t
theffective_1(thread_t thread)2392 theffective_1(thread_t thread)
2393 {
2394 #if defined __LP64__
2395 return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2396 #else
2397 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2398 return raw[1];
2399 #endif
2400 }
2401
2402
2403 /*
2404 * Set an override on the thread which is consulted with a
2405 * higher priority than the task/thread policy. This should
2406 * only be set for temporary grants until the thread
2407 * returns to the userspace boundary
2408 *
2409 * We use atomic operations to swap in the override, with
2410 * the assumption that the thread itself can
2411 * read the override and clear it on return to userspace.
2412 *
2413 * No locking is performed, since it is acceptable to see
2414 * a stale override for one loop through throttle_lowpri_io().
2415 * However a thread reference must be held on the thread.
2416 */
2417
2418 void
set_thread_iotier_override(thread_t thread,int policy)2419 set_thread_iotier_override(thread_t thread, int policy)
2420 {
2421 int current_override;
2422
2423 /* Let most aggressive I/O policy win until user boundary */
2424 do {
2425 current_override = thread->iotier_override;
2426
2427 if (current_override != THROTTLE_LEVEL_NONE) {
2428 policy = MIN(current_override, policy);
2429 }
2430
2431 if (current_override == policy) {
2432 /* no effective change */
2433 return;
2434 }
2435 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2436
2437 /*
2438 * Since the thread may be currently throttled,
2439 * re-evaluate tiers and potentially break out
2440 * of an msleep
2441 */
2442 rethrottle_thread(get_bsdthread_info(thread));
2443 }
2444
2445 /*
2446 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2447 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2448 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2449 * priority thread. In these cases, we attempt to propagate the priority token, as long
2450 * as the subsystem informs us of the relationships between the threads. The userspace
2451 * synchronization subsystem should maintain the information of owner->resource and
2452 * resource->waiters itself.
2453 */
2454
2455 /*
2456 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2457 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2458 * to be handled specially in the future, but for now it's fine to slam
2459 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2460 */
2461 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2462 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2463 {
2464 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2465 /* Map all input resource/type to a single one */
2466 *resource = USER_ADDR_NULL;
2467 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2468 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2469 /* no transform */
2470 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2471 /* Map all mutex overrides to a single one, to avoid memory overhead */
2472 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2473 *resource = USER_ADDR_NULL;
2474 }
2475 }
2476 }
2477
2478 /* This helper routine finds an existing override if known. Locking should be done by caller */
2479 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2480 find_qos_override(thread_t thread,
2481 user_addr_t resource,
2482 int resource_type)
2483 {
2484 struct thread_qos_override *override;
2485
2486 override = thread->overrides;
2487 while (override) {
2488 if (override->override_resource == resource &&
2489 override->override_resource_type == resource_type) {
2490 return override;
2491 }
2492
2493 override = override->override_next;
2494 }
2495
2496 return NULL;
2497 }
2498
2499 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2500 find_and_decrement_qos_override(thread_t thread,
2501 user_addr_t resource,
2502 int resource_type,
2503 boolean_t reset,
2504 struct thread_qos_override **free_override_list)
2505 {
2506 struct thread_qos_override *override, *override_prev;
2507
2508 override_prev = NULL;
2509 override = thread->overrides;
2510 while (override) {
2511 struct thread_qos_override *override_next = override->override_next;
2512
2513 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2514 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2515 if (reset) {
2516 override->override_contended_resource_count = 0;
2517 } else {
2518 override->override_contended_resource_count--;
2519 }
2520
2521 if (override->override_contended_resource_count == 0) {
2522 if (override_prev == NULL) {
2523 thread->overrides = override_next;
2524 } else {
2525 override_prev->override_next = override_next;
2526 }
2527
2528 /* Add to out-param for later zfree */
2529 override->override_next = *free_override_list;
2530 *free_override_list = override;
2531 } else {
2532 override_prev = override;
2533 }
2534
2535 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2536 return;
2537 }
2538 } else {
2539 override_prev = override;
2540 }
2541
2542 override = override_next;
2543 }
2544 }
2545
2546 /* This helper recalculates the current requested override using the policy selected at boot */
2547 static int
calculate_requested_qos_override(thread_t thread)2548 calculate_requested_qos_override(thread_t thread)
2549 {
2550 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2551 return THREAD_QOS_UNSPECIFIED;
2552 }
2553
2554 /* iterate over all overrides and calculate MAX */
2555 struct thread_qos_override *override;
2556 int qos_override = THREAD_QOS_UNSPECIFIED;
2557
2558 override = thread->overrides;
2559 while (override) {
2560 qos_override = MAX(qos_override, override->override_qos);
2561 override = override->override_next;
2562 }
2563
2564 return qos_override;
2565 }
2566
2567 /*
2568 * Returns:
2569 * - 0 on success
2570 * - EINVAL if some invalid input was passed
2571 */
2572 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2573 proc_thread_qos_add_override_internal(thread_t thread,
2574 int override_qos,
2575 boolean_t first_override_for_resource,
2576 user_addr_t resource,
2577 int resource_type)
2578 {
2579 struct task_pend_token pend_token = {};
2580 int rc = 0;
2581
2582 thread_mtx_lock(thread);
2583
2584 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2585 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2586
2587 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2588 uint64_t, thread->requested_policy.thrp_qos,
2589 uint64_t, thread->effective_policy.thep_qos,
2590 int, override_qos, boolean_t, first_override_for_resource);
2591
2592 struct thread_qos_override *override;
2593 struct thread_qos_override *override_new = NULL;
2594 int new_qos_override, prev_qos_override;
2595 int new_effective_qos;
2596
2597 canonicalize_resource_and_type(&resource, &resource_type);
2598
2599 override = find_qos_override(thread, resource, resource_type);
2600 if (first_override_for_resource && !override) {
2601 /* We need to allocate a new object. Drop the thread lock and
2602 * recheck afterwards in case someone else added the override
2603 */
2604 thread_mtx_unlock(thread);
2605 override_new = zalloc(thread_qos_override_zone);
2606 thread_mtx_lock(thread);
2607 override = find_qos_override(thread, resource, resource_type);
2608 }
2609 if (first_override_for_resource && override) {
2610 /* Someone else already allocated while the thread lock was dropped */
2611 override->override_contended_resource_count++;
2612 } else if (!override && override_new) {
2613 override = override_new;
2614 override_new = NULL;
2615 override->override_next = thread->overrides;
2616 /* since first_override_for_resource was TRUE */
2617 override->override_contended_resource_count = 1;
2618 override->override_resource = resource;
2619 override->override_resource_type = (int16_t)resource_type;
2620 override->override_qos = THREAD_QOS_UNSPECIFIED;
2621 thread->overrides = override;
2622 }
2623
2624 if (override) {
2625 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2626 override->override_qos = (int16_t)override_qos;
2627 } else {
2628 override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2629 }
2630 }
2631
2632 /* Determine how to combine the various overrides into a single current
2633 * requested override
2634 */
2635 new_qos_override = calculate_requested_qos_override(thread);
2636
2637 prev_qos_override = proc_get_thread_policy_locked(thread,
2638 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2639
2640 if (new_qos_override != prev_qos_override) {
2641 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2642 TASK_POLICY_QOS_OVERRIDE,
2643 new_qos_override, 0, &pend_token);
2644 }
2645
2646 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2647
2648 thread_mtx_unlock(thread);
2649
2650 thread_policy_update_complete_unlocked(thread, &pend_token);
2651
2652 if (override_new) {
2653 zfree(thread_qos_override_zone, override_new);
2654 }
2655
2656 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2657 int, new_qos_override, int, new_effective_qos, int, rc);
2658
2659 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2660 new_qos_override, resource, resource_type, 0, 0);
2661
2662 return rc;
2663 }
2664
2665 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2666 proc_thread_qos_add_override(task_t task,
2667 thread_t thread,
2668 uint64_t tid,
2669 int override_qos,
2670 boolean_t first_override_for_resource,
2671 user_addr_t resource,
2672 int resource_type)
2673 {
2674 boolean_t has_thread_reference = FALSE;
2675 int rc = 0;
2676
2677 if (thread == THREAD_NULL) {
2678 thread = task_findtid(task, tid);
2679 /* returns referenced thread */
2680
2681 if (thread == THREAD_NULL) {
2682 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2683 tid, 0, 0xdead, 0, 0);
2684 return ESRCH;
2685 }
2686 has_thread_reference = TRUE;
2687 } else {
2688 assert(get_threadtask(thread) == task);
2689 }
2690 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2691 first_override_for_resource, resource, resource_type);
2692 if (has_thread_reference) {
2693 thread_deallocate(thread);
2694 }
2695
2696 return rc;
2697 }
2698
2699 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2700 proc_thread_qos_remove_override_internal(thread_t thread,
2701 user_addr_t resource,
2702 int resource_type,
2703 boolean_t reset)
2704 {
2705 struct task_pend_token pend_token = {};
2706
2707 struct thread_qos_override *deferred_free_override_list = NULL;
2708 int new_qos_override, prev_qos_override, new_effective_qos;
2709
2710 thread_mtx_lock(thread);
2711
2712 canonicalize_resource_and_type(&resource, &resource_type);
2713
2714 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2715
2716 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2717 thread_tid(thread), resource, reset, 0, 0);
2718
2719 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2720 uint64_t, thread->requested_policy.thrp_qos,
2721 uint64_t, thread->effective_policy.thep_qos);
2722
2723 /* Determine how to combine the various overrides into a single current requested override */
2724 new_qos_override = calculate_requested_qos_override(thread);
2725
2726 spl_t s = splsched();
2727 thread_lock(thread);
2728
2729 /*
2730 * The override chain and therefore the value of the current override is locked with thread mutex,
2731 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2732 * This means you can't change the current override from a spinlock-only setter.
2733 */
2734 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2735
2736 if (new_qos_override != prev_qos_override) {
2737 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2738 }
2739
2740 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2741
2742 thread_unlock(thread);
2743 splx(s);
2744
2745 thread_mtx_unlock(thread);
2746
2747 thread_policy_update_complete_unlocked(thread, &pend_token);
2748
2749 while (deferred_free_override_list) {
2750 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2751
2752 zfree(thread_qos_override_zone, deferred_free_override_list);
2753 deferred_free_override_list = override_next;
2754 }
2755
2756 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2757 int, new_qos_override, int, new_effective_qos);
2758
2759 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2760 thread_tid(thread), 0, 0, 0, 0);
2761 }
2762
2763 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2764 proc_thread_qos_remove_override(task_t task,
2765 thread_t thread,
2766 uint64_t tid,
2767 user_addr_t resource,
2768 int resource_type)
2769 {
2770 boolean_t has_thread_reference = FALSE;
2771
2772 if (thread == THREAD_NULL) {
2773 thread = task_findtid(task, tid);
2774 /* returns referenced thread */
2775
2776 if (thread == THREAD_NULL) {
2777 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2778 tid, 0, 0xdead, 0, 0);
2779 return ESRCH;
2780 }
2781 has_thread_reference = TRUE;
2782 } else {
2783 assert(task == get_threadtask(thread));
2784 }
2785
2786 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2787
2788 if (has_thread_reference) {
2789 thread_deallocate(thread);
2790 }
2791
2792 return 0;
2793 }
2794
2795 /* Deallocate before thread termination */
2796 void
proc_thread_qos_deallocate(thread_t thread)2797 proc_thread_qos_deallocate(thread_t thread)
2798 {
2799 /* This thread must have no more IPC overrides. */
2800 assert(thread->kevent_overrides == 0);
2801 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2802 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2803
2804 /*
2805 * Clear out any lingering override objects.
2806 */
2807 struct thread_qos_override *override;
2808
2809 thread_mtx_lock(thread);
2810 override = thread->overrides;
2811 thread->overrides = NULL;
2812 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2813 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2814 thread_mtx_unlock(thread);
2815
2816 while (override) {
2817 struct thread_qos_override *override_next = override->override_next;
2818
2819 zfree(thread_qos_override_zone, override);
2820 override = override_next;
2821 }
2822 }
2823
2824 /*
2825 * Set up the primordial thread's QoS
2826 */
2827 void
task_set_main_thread_qos(task_t task,thread_t thread)2828 task_set_main_thread_qos(task_t task, thread_t thread)
2829 {
2830 struct task_pend_token pend_token = {};
2831
2832 assert(get_threadtask(thread) == task);
2833
2834 thread_mtx_lock(thread);
2835
2836 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2837 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2838 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2839 thread->requested_policy.thrp_qos, 0);
2840
2841 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2842
2843 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2844 primordial_qos, 0, &pend_token);
2845
2846 thread_mtx_unlock(thread);
2847
2848 thread_policy_update_complete_unlocked(thread, &pend_token);
2849
2850 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2851 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2852 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2853 primordial_qos, 0);
2854 }
2855
2856 /*
2857 * KPI for pthread kext
2858 *
2859 * Return a good guess at what the initial manager QoS will be
2860 * Dispatch can override this in userspace if it so chooses
2861 */
2862 thread_qos_t
task_get_default_manager_qos(task_t task)2863 task_get_default_manager_qos(task_t task)
2864 {
2865 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2866
2867 if (primordial_qos == THREAD_QOS_LEGACY) {
2868 primordial_qos = THREAD_QOS_USER_INITIATED;
2869 }
2870
2871 return primordial_qos;
2872 }
2873
2874 /*
2875 * Check if the kernel promotion on thread has changed
2876 * and apply it.
2877 *
2878 * thread locked on entry and exit
2879 */
2880 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)2881 thread_recompute_kernel_promotion_locked(thread_t thread)
2882 {
2883 boolean_t needs_update = FALSE;
2884 uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
2885
2886 /*
2887 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2888 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2889 * and propagates the priority through the chain with the same cap, because as of now it does
2890 * not differenciate on the kernel primitive.
2891 *
2892 * If this assumption will change with the adoption of a kernel primitive that does not
2893 * cap the when adding/propagating,
2894 * then here is the place to put the generic cap for all kernel primitives
2895 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2896 */
2897 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2898
2899 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2900 KDBG(MACHDBG_CODE(
2901 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
2902 thread_tid(thread),
2903 kern_promotion_schedpri,
2904 thread->kern_promotion_schedpri);
2905
2906 needs_update = TRUE;
2907 thread->kern_promotion_schedpri = kern_promotion_schedpri;
2908 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
2909 }
2910
2911 return needs_update;
2912 }
2913
2914 /*
2915 * Check if the user promotion on thread has changed
2916 * and apply it.
2917 *
2918 * thread locked on entry, might drop the thread lock
2919 * and reacquire it.
2920 */
2921 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)2922 thread_recompute_user_promotion_locked(thread_t thread)
2923 {
2924 boolean_t needs_update = FALSE;
2925 struct task_pend_token pend_token = {};
2926 uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
2927 int old_base_pri = thread->base_pri;
2928 thread_qos_t qos_promotion;
2929
2930 /* Check if user promotion has changed */
2931 if (thread->user_promotion_basepri == user_promotion_basepri) {
2932 return needs_update;
2933 } else {
2934 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2935 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2936 thread_tid(thread),
2937 user_promotion_basepri,
2938 thread->user_promotion_basepri,
2939 0, 0);
2940 KDBG(MACHDBG_CODE(
2941 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
2942 thread_tid(thread),
2943 user_promotion_basepri,
2944 thread->user_promotion_basepri);
2945 }
2946
2947 /* Update the user promotion base pri */
2948 thread->user_promotion_basepri = user_promotion_basepri;
2949 pend_token.tpt_force_recompute_pri = 1;
2950
2951 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2952 qos_promotion = THREAD_QOS_UNSPECIFIED;
2953 } else {
2954 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2955 }
2956
2957 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2958 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2959
2960 if (thread_get_waiting_turnstile(thread) &&
2961 thread->base_pri != old_base_pri) {
2962 needs_update = TRUE;
2963 }
2964
2965 thread_unlock(thread);
2966
2967 thread_policy_update_complete_unlocked(thread, &pend_token);
2968
2969 thread_lock(thread);
2970
2971 return needs_update;
2972 }
2973
2974 /*
2975 * Convert the thread user promotion base pri to qos for threads in qos world.
2976 * For priority above UI qos, the qos would be set to UI.
2977 */
2978 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)2979 thread_user_promotion_qos_for_pri(int priority)
2980 {
2981 thread_qos_t qos;
2982 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2983 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2984 return qos;
2985 }
2986 }
2987 return THREAD_QOS_MAINTENANCE;
2988 }
2989
2990 /*
2991 * Set the thread's QoS Kevent override
2992 * Owned by the Kevent subsystem
2993 *
2994 * May be called with spinlocks held, but not spinlocks
2995 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2996 *
2997 * One 'add' must be balanced by one 'drop'.
2998 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2999 * Before the thread is deallocated, there must be 0 remaining overrides.
3000 */
3001 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3002 thread_kevent_override(thread_t thread,
3003 uint32_t qos_override,
3004 boolean_t is_new_override)
3005 {
3006 struct task_pend_token pend_token = {};
3007 boolean_t needs_update;
3008
3009 spl_t s = splsched();
3010 thread_lock(thread);
3011
3012 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3013
3014 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3015 assert(qos_override < THREAD_QOS_LAST);
3016
3017 if (is_new_override) {
3018 if (thread->kevent_overrides++ == 0) {
3019 /* This add is the first override for this thread */
3020 assert(old_override == THREAD_QOS_UNSPECIFIED);
3021 } else {
3022 /* There are already other overrides in effect for this thread */
3023 assert(old_override > THREAD_QOS_UNSPECIFIED);
3024 }
3025 } else {
3026 /* There must be at least one override (the previous add call) in effect */
3027 assert(thread->kevent_overrides > 0);
3028 assert(old_override > THREAD_QOS_UNSPECIFIED);
3029 }
3030
3031 /*
3032 * We can't allow lowering if there are several IPC overrides because
3033 * the caller can't possibly know the whole truth
3034 */
3035 if (thread->kevent_overrides == 1) {
3036 needs_update = qos_override != old_override;
3037 } else {
3038 needs_update = qos_override > old_override;
3039 }
3040
3041 if (needs_update) {
3042 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3043 TASK_POLICY_QOS_KEVENT_OVERRIDE,
3044 qos_override, 0, &pend_token);
3045 assert(pend_token.tpt_update_sockets == 0);
3046 }
3047
3048 thread_unlock(thread);
3049 splx(s);
3050
3051 thread_policy_update_complete_unlocked(thread, &pend_token);
3052 }
3053
3054 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3055 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3056 {
3057 thread_kevent_override(thread, qos_override, TRUE);
3058 }
3059
3060 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3061 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3062 {
3063 thread_kevent_override(thread, qos_override, FALSE);
3064 }
3065
3066 void
thread_drop_kevent_override(thread_t thread)3067 thread_drop_kevent_override(thread_t thread)
3068 {
3069 struct task_pend_token pend_token = {};
3070
3071 spl_t s = splsched();
3072 thread_lock(thread);
3073
3074 assert(thread->kevent_overrides > 0);
3075
3076 if (--thread->kevent_overrides == 0) {
3077 /*
3078 * There are no more overrides for this thread, so we should
3079 * clear out the saturated override value
3080 */
3081
3082 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3083 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3084 0, &pend_token);
3085 }
3086
3087 thread_unlock(thread);
3088 splx(s);
3089
3090 thread_policy_update_complete_unlocked(thread, &pend_token);
3091 }
3092
3093 /*
3094 * Set the thread's QoS Workloop Servicer override
3095 * Owned by the Kevent subsystem
3096 *
3097 * May be called with spinlocks held, but not spinlocks
3098 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3099 *
3100 * One 'add' must be balanced by one 'drop'.
3101 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3102 * Before the thread is deallocated, there must be 0 remaining overrides.
3103 */
3104 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3105 thread_servicer_override(thread_t thread,
3106 uint32_t qos_override,
3107 boolean_t is_new_override)
3108 {
3109 struct task_pend_token pend_token = {};
3110
3111 spl_t s = splsched();
3112 thread_lock(thread);
3113
3114 if (is_new_override) {
3115 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3116 } else {
3117 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3118 }
3119
3120 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3121 TASK_POLICY_QOS_SERVICER_OVERRIDE,
3122 qos_override, 0, &pend_token);
3123
3124 thread_unlock(thread);
3125 splx(s);
3126
3127 assert(pend_token.tpt_update_sockets == 0);
3128 thread_policy_update_complete_unlocked(thread, &pend_token);
3129 }
3130
3131 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3132 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3133 {
3134 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3135 assert(qos_override < THREAD_QOS_LAST);
3136
3137 thread_servicer_override(thread, qos_override, TRUE);
3138 }
3139
3140 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3141 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3142 {
3143 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3144 assert(qos_override < THREAD_QOS_LAST);
3145
3146 thread_servicer_override(thread, qos_override, FALSE);
3147 }
3148
3149 void
thread_drop_servicer_override(thread_t thread)3150 thread_drop_servicer_override(thread_t thread)
3151 {
3152 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3153 }
3154
3155
3156 /* Get current requested qos / relpri, may be called from spinlock context */
3157 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3158 thread_get_requested_qos(thread_t thread, int *relpri)
3159 {
3160 int relprio_value = 0;
3161 thread_qos_t qos;
3162
3163 qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3164 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3165 if (relpri) {
3166 *relpri = -relprio_value;
3167 }
3168 return qos;
3169 }
3170
3171 /*
3172 * This function will promote the thread priority
3173 * since exec could block other threads calling
3174 * proc_find on the proc. This boost must be removed
3175 * via call to thread_clear_exec_promotion.
3176 *
3177 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3178 */
3179 void
thread_set_exec_promotion(thread_t thread)3180 thread_set_exec_promotion(thread_t thread)
3181 {
3182 spl_t s = splsched();
3183 thread_lock(thread);
3184
3185 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3186
3187 thread_unlock(thread);
3188 splx(s);
3189 }
3190
3191 /*
3192 * This function will clear the exec thread
3193 * promotion set on the thread by thread_set_exec_promotion.
3194 */
3195 void
thread_clear_exec_promotion(thread_t thread)3196 thread_clear_exec_promotion(thread_t thread)
3197 {
3198 spl_t s = splsched();
3199 thread_lock(thread);
3200
3201 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3202
3203 thread_unlock(thread);
3204 splx(s);
3205 }
3206