1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <kern/work_interval.h>
37 #include <mach/task_policy.h>
38 #include <kern/sfi.h>
39 #include <kern/policy_internal.h>
40 #include <sys/errno.h>
41 #include <sys/ulock.h>
42
43 #include <mach/machine/sdt.h>
44
45 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
46 struct thread_qos_override, KT_DEFAULT);
47
48 #ifdef MACH_BSD
49 extern int proc_selfpid(void);
50 extern char * proc_name_address(void *p);
51 extern void rethrottle_thread(void * uthread);
52 #endif /* MACH_BSD */
53
54 #define QOS_EXTRACT(q) ((q) & 0xff)
55
56 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
57 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
60
61 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
62 QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
63
64 static void
65 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
66
67 const int thread_default_iotier_override = THROTTLE_LEVEL_END;
68
69 const struct thread_requested_policy default_thread_requested_policy = {
70 .thrp_iotier_kevent_override = thread_default_iotier_override
71 };
72
73 /*
74 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
75 * to threads that don't have a QoS class set.
76 */
77 const qos_policy_params_t thread_qos_policy_params = {
78 /*
79 * This table defines the starting base priority of the thread,
80 * which will be modified by the thread importance and the task max priority
81 * before being applied.
82 */
83 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
84 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
85 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
86 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
87 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
88 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
89 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
90
91 /*
92 * This table defines the highest IO priority that a thread marked with this
93 * QoS class can have.
94 */
95 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
96 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
97 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
98 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
99 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
100 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
101 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
102
103 /*
104 * This table defines the highest QoS level that
105 * a thread marked with this QoS class can have.
106 */
107
108 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
109 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
110 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
113 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115
116 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
117 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
118 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 };
124
125 static void
126 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
127
128 static int
129 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
130
131 static void
132 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
133
134 static void
135 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
136
137 static void
138 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
139
140 static void
141 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
142
143 static int
144 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
145
146 static int
147 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
148
149 static void
150 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
151
152 static void
153 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
154
155 boolean_t
thread_has_qos_policy(thread_t thread)156 thread_has_qos_policy(thread_t thread)
157 {
158 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160
161
162 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)163 thread_remove_qos_policy_locked(thread_t thread,
164 task_pend_token_t pend_token)
165 {
166 __unused int prev_qos = thread->requested_policy.thrp_qos;
167
168 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169
170 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173
174 kern_return_t
thread_remove_qos_policy(thread_t thread)175 thread_remove_qos_policy(thread_t thread)
176 {
177 struct task_pend_token pend_token = {};
178
179 thread_mtx_lock(thread);
180 if (!thread->active) {
181 thread_mtx_unlock(thread);
182 return KERN_TERMINATED;
183 }
184
185 thread_remove_qos_policy_locked(thread, &pend_token);
186
187 thread_mtx_unlock(thread);
188
189 thread_policy_update_complete_unlocked(thread, &pend_token);
190
191 return KERN_SUCCESS;
192 }
193
194
195 boolean_t
thread_is_static_param(thread_t thread)196 thread_is_static_param(thread_t thread)
197 {
198 if (thread->static_param) {
199 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 return TRUE;
201 }
202 return FALSE;
203 }
204
205 /*
206 * Relative priorities can range between 0REL and -15REL. These
207 * map to QoS-specific ranges, to create non-overlapping priority
208 * ranges.
209 */
210 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 int next_lower_qos;
214
215 /* Fast path, since no validation or scaling is needed */
216 if (qos_relprio == 0) {
217 return 0;
218 }
219
220 switch (qos) {
221 case THREAD_QOS_USER_INTERACTIVE:
222 next_lower_qos = THREAD_QOS_USER_INITIATED;
223 break;
224 case THREAD_QOS_USER_INITIATED:
225 next_lower_qos = THREAD_QOS_LEGACY;
226 break;
227 case THREAD_QOS_LEGACY:
228 next_lower_qos = THREAD_QOS_UTILITY;
229 break;
230 case THREAD_QOS_UTILITY:
231 next_lower_qos = THREAD_QOS_BACKGROUND;
232 break;
233 case THREAD_QOS_MAINTENANCE:
234 case THREAD_QOS_BACKGROUND:
235 next_lower_qos = 0;
236 break;
237 default:
238 panic("Unrecognized QoS %d", qos);
239 return 0;
240 }
241
242 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244
245 /*
246 * We now have the valid range that the scaled relative priority can map to. Note
247 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 * remainder.
251 */
252 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253
254 return scaled_relprio;
255 }
256
257 /*
258 * flag set by -qos-policy-allow boot-arg to allow
259 * testing thread qos policy from userspace
260 */
261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262
263 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)264 thread_policy_set(
265 thread_t thread,
266 thread_policy_flavor_t flavor,
267 thread_policy_t policy_info,
268 mach_msg_type_number_t count)
269 {
270 thread_qos_policy_data_t req_qos;
271 kern_return_t kr;
272
273 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274
275 if (thread == THREAD_NULL) {
276 return KERN_INVALID_ARGUMENT;
277 }
278
279 if (!allow_qos_policy_set) {
280 if (thread_is_static_param(thread)) {
281 return KERN_POLICY_STATIC;
282 }
283
284 if (flavor == THREAD_QOS_POLICY) {
285 return KERN_INVALID_ARGUMENT;
286 }
287
288 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
289 if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
290 return KERN_INVALID_ARGUMENT;
291 }
292 thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
293 if (info->priority != BASEPRI_RTQUEUES) {
294 return KERN_INVALID_ARGUMENT;
295 }
296 }
297 }
298
299 if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
300 thread_work_interval_flags_t th_wi_flags = os_atomic_load(
301 &thread->th_work_interval_flags, relaxed);
302 if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
303 !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
304 /* Fail requests to become realtime for threads having joined workintervals
305 * with workload ID that don't have the rt-allowed flag. */
306 return KERN_INVALID_POLICY;
307 }
308 }
309
310 /* Threads without static_param set reset their QoS when other policies are applied. */
311 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
312 /* Store the existing tier, if we fail this call it is used to reset back. */
313 req_qos.qos_tier = thread->requested_policy.thrp_qos;
314 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
315
316 kr = thread_remove_qos_policy(thread);
317 if (kr != KERN_SUCCESS) {
318 return kr;
319 }
320 }
321
322 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
323
324 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
325 if (kr != KERN_SUCCESS) {
326 /* Reset back to our original tier as the set failed. */
327 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
328 }
329 }
330
331 return kr;
332 }
333
334 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
338
339 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)340 thread_policy_set_internal(
341 thread_t thread,
342 thread_policy_flavor_t flavor,
343 thread_policy_t policy_info,
344 mach_msg_type_number_t count)
345 {
346 kern_return_t result = KERN_SUCCESS;
347 struct task_pend_token pend_token = {};
348
349 thread_mtx_lock(thread);
350 if (!thread->active) {
351 thread_mtx_unlock(thread);
352
353 return KERN_TERMINATED;
354 }
355
356 switch (flavor) {
357 case THREAD_EXTENDED_POLICY:
358 {
359 boolean_t timeshare = TRUE;
360
361 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
362 thread_extended_policy_t info;
363
364 info = (thread_extended_policy_t)policy_info;
365 timeshare = info->timeshare;
366 }
367
368 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
369
370 spl_t s = splsched();
371 thread_lock(thread);
372
373 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
374
375 thread_unlock(thread);
376 splx(s);
377
378 /*
379 * The thread may be demoted with RT_DISALLOWED but has just
380 * changed its sched mode to TIMESHARE or FIXED. Make sure to
381 * undemote the thread so the new sched mode takes effect.
382 */
383 thread_rt_evaluate(thread);
384
385 pend_token.tpt_update_thread_sfi = 1;
386
387 break;
388 }
389
390 case THREAD_TIME_CONSTRAINT_POLICY:
391 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
392 {
393 thread_time_constraint_with_priority_policy_t info;
394
395 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
396 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
397 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
398
399 if (count < min_count) {
400 result = KERN_INVALID_ARGUMENT;
401 break;
402 }
403
404 info = (thread_time_constraint_with_priority_policy_t)policy_info;
405
406
407 if (info->constraint < info->computation ||
408 info->computation > max_rt_quantum ||
409 info->computation < min_rt_quantum) {
410 result = KERN_INVALID_ARGUMENT;
411 break;
412 }
413
414 if (info->computation < (info->constraint / 2)) {
415 info->computation = (info->constraint / 2);
416 if (info->computation > max_rt_quantum) {
417 info->computation = max_rt_quantum;
418 }
419 }
420
421 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
422 if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
423 result = KERN_INVALID_ARGUMENT;
424 break;
425 }
426 }
427
428 spl_t s = splsched();
429 thread_lock(thread);
430
431 thread->realtime.period = info->period;
432 thread->realtime.computation = info->computation;
433 thread->realtime.constraint = info->constraint;
434 thread->realtime.preemptible = info->preemptible;
435
436 /*
437 * If the thread has a work interval driven policy, the priority
438 * offset has been set by the work interval.
439 */
440 if (!thread->requested_policy.thrp_wi_driven) {
441 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
442 thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
443 } else {
444 thread->realtime.priority_offset = 0;
445 }
446 }
447
448 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
449
450 thread_unlock(thread);
451 splx(s);
452
453 thread_rt_evaluate(thread);
454
455 pend_token.tpt_update_thread_sfi = 1;
456
457 break;
458 }
459
460 case THREAD_PRECEDENCE_POLICY:
461 {
462 thread_precedence_policy_t info;
463
464 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
465 result = KERN_INVALID_ARGUMENT;
466 break;
467 }
468 info = (thread_precedence_policy_t)policy_info;
469
470 spl_t s = splsched();
471 thread_lock(thread);
472
473 thread->importance = info->importance;
474
475 thread_recompute_priority(thread);
476
477 thread_unlock(thread);
478 splx(s);
479
480 break;
481 }
482
483 case THREAD_AFFINITY_POLICY:
484 {
485 extern boolean_t affinity_sets_enabled;
486 thread_affinity_policy_t info;
487
488 if (!affinity_sets_enabled) {
489 result = KERN_INVALID_POLICY;
490 break;
491 }
492
493 if (!thread_affinity_is_supported()) {
494 result = KERN_NOT_SUPPORTED;
495 break;
496 }
497 if (count < THREAD_AFFINITY_POLICY_COUNT) {
498 result = KERN_INVALID_ARGUMENT;
499 break;
500 }
501
502 info = (thread_affinity_policy_t) policy_info;
503 /*
504 * Unlock the thread mutex here and
505 * return directly after calling thread_affinity_set().
506 * This is necessary for correct lock ordering because
507 * thread_affinity_set() takes the task lock.
508 */
509 thread_mtx_unlock(thread);
510 return thread_affinity_set(thread, info->affinity_tag);
511 }
512
513 #if !defined(XNU_TARGET_OS_OSX)
514 case THREAD_BACKGROUND_POLICY:
515 {
516 thread_background_policy_t info;
517
518 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
519 result = KERN_INVALID_ARGUMENT;
520 break;
521 }
522
523 if (get_threadtask(thread) != current_task()) {
524 result = KERN_PROTECTION_FAILURE;
525 break;
526 }
527
528 info = (thread_background_policy_t) policy_info;
529
530 int enable;
531
532 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
533 enable = TASK_POLICY_ENABLE;
534 } else {
535 enable = TASK_POLICY_DISABLE;
536 }
537
538 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
539
540 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
541
542 break;
543 }
544 #endif /* !defined(XNU_TARGET_OS_OSX) */
545
546 case THREAD_THROUGHPUT_QOS_POLICY:
547 {
548 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
549 thread_throughput_qos_t tqos;
550
551 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
552 result = KERN_INVALID_ARGUMENT;
553 break;
554 }
555
556 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
557 break;
558 }
559
560 tqos = qos_extract(info->thread_throughput_qos_tier);
561
562 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
563 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
564
565 break;
566 }
567
568 case THREAD_LATENCY_QOS_POLICY:
569 {
570 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
571 thread_latency_qos_t lqos;
572
573 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
574 result = KERN_INVALID_ARGUMENT;
575 break;
576 }
577
578 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
579 break;
580 }
581
582 lqos = qos_extract(info->thread_latency_qos_tier);
583
584 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
585 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
586
587 break;
588 }
589
590 case THREAD_QOS_POLICY:
591 {
592 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
593
594 if (count < THREAD_QOS_POLICY_COUNT) {
595 result = KERN_INVALID_ARGUMENT;
596 break;
597 }
598
599 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
600 result = KERN_INVALID_ARGUMENT;
601 break;
602 }
603
604 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
605 result = KERN_INVALID_ARGUMENT;
606 break;
607 }
608
609 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
610 result = KERN_INVALID_ARGUMENT;
611 break;
612 }
613
614 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
615 info->qos_tier, -info->tier_importance, &pend_token);
616
617 break;
618 }
619
620 default:
621 result = KERN_INVALID_ARGUMENT;
622 break;
623 }
624
625 thread_mtx_unlock(thread);
626
627 thread_policy_update_complete_unlocked(thread, &pend_token);
628
629 return result;
630 }
631
632 /*
633 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
634 * Both result in FIXED mode scheduling.
635 */
636 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)637 convert_policy_to_sched_mode(integer_t policy)
638 {
639 switch (policy) {
640 case POLICY_TIMESHARE:
641 return TH_MODE_TIMESHARE;
642 case POLICY_RR:
643 case POLICY_FIFO:
644 return TH_MODE_FIXED;
645 default:
646 panic("unexpected sched policy: %d", policy);
647 return TH_MODE_NONE;
648 }
649 }
650
651 /*
652 * Called either with the thread mutex locked
653 * or from the pthread kext in a 'safe place'.
654 */
655 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)656 thread_set_mode_and_absolute_pri_internal(thread_t thread,
657 sched_mode_t mode,
658 integer_t priority,
659 task_pend_token_t pend_token)
660 {
661 kern_return_t kr = KERN_SUCCESS;
662
663 spl_t s = splsched();
664 thread_lock(thread);
665
666 /* This path isn't allowed to change a thread out of realtime. */
667 if ((thread->sched_mode == TH_MODE_REALTIME) ||
668 (thread->saved_mode == TH_MODE_REALTIME)) {
669 kr = KERN_FAILURE;
670 goto unlock;
671 }
672
673 if (thread->policy_reset) {
674 kr = KERN_SUCCESS;
675 goto unlock;
676 }
677
678 sched_mode_t old_mode = thread->sched_mode;
679
680 /*
681 * Reverse engineer and apply the correct importance value
682 * from the requested absolute priority value.
683 *
684 * TODO: Store the absolute priority value instead
685 */
686
687 if (priority >= thread->max_priority) {
688 priority = thread->max_priority - thread->task_priority;
689 } else if (priority >= MINPRI_KERNEL) {
690 priority -= MINPRI_KERNEL;
691 } else if (priority >= MINPRI_RESERVED) {
692 priority -= MINPRI_RESERVED;
693 } else {
694 priority -= BASEPRI_DEFAULT;
695 }
696
697 priority += thread->task_priority;
698
699 if (priority > thread->max_priority) {
700 priority = thread->max_priority;
701 } else if (priority < MINPRI) {
702 priority = MINPRI;
703 }
704
705 thread->importance = priority - thread->task_priority;
706
707 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
708
709 if (mode != old_mode) {
710 pend_token->tpt_update_thread_sfi = 1;
711 }
712
713 unlock:
714 thread_unlock(thread);
715 splx(s);
716
717 return kr;
718 }
719
720 void
thread_freeze_base_pri(thread_t thread)721 thread_freeze_base_pri(thread_t thread)
722 {
723 assert(thread == current_thread());
724
725 spl_t s = splsched();
726 thread_lock(thread);
727
728 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
729 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
730
731 thread_unlock(thread);
732 splx(s);
733 }
734
735 bool
thread_unfreeze_base_pri(thread_t thread)736 thread_unfreeze_base_pri(thread_t thread)
737 {
738 assert(thread == current_thread());
739 integer_t base_pri;
740 ast_t ast = 0;
741
742 spl_t s = splsched();
743 thread_lock(thread);
744
745 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
746 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
747
748 base_pri = thread->req_base_pri;
749 if (base_pri != thread->base_pri) {
750 /*
751 * This function returns "true" if the base pri change
752 * is the most likely cause for the preemption.
753 */
754 sched_set_thread_base_priority(thread, base_pri);
755 ast = ast_peek(AST_PREEMPT);
756 }
757
758 thread_unlock(thread);
759 splx(s);
760
761 return ast != 0;
762 }
763
764 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)765 thread_workq_pri_for_qos(thread_qos_t qos)
766 {
767 assert(qos < THREAD_QOS_LAST);
768 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
769 }
770
771 thread_qos_t
thread_workq_qos_for_pri(int priority)772 thread_workq_qos_for_pri(int priority)
773 {
774 thread_qos_t qos;
775 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
776 // indicate that workq should map >UI threads to workq's
777 // internal notation for above-UI work.
778 return THREAD_QOS_UNSPECIFIED;
779 }
780 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
781 // map a given priority up to the next nearest qos band.
782 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
783 return qos;
784 }
785 }
786 return THREAD_QOS_MAINTENANCE;
787 }
788
789 /*
790 * private interface for pthread workqueues
791 *
792 * Set scheduling policy & absolute priority for thread
793 * May be called with spinlocks held
794 * Thread mutex lock is not held
795 */
796 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)797 thread_reset_workq_qos(thread_t thread, uint32_t qos)
798 {
799 struct task_pend_token pend_token = {};
800
801 assert(qos < THREAD_QOS_LAST);
802
803 spl_t s = splsched();
804 thread_lock(thread);
805
806 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
807 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
808 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
809 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
810 &pend_token);
811
812 assert(pend_token.tpt_update_sockets == 0);
813
814 thread_unlock(thread);
815 splx(s);
816
817 thread_policy_update_complete_unlocked(thread, &pend_token);
818 }
819
820 /*
821 * private interface for pthread workqueues
822 *
823 * Set scheduling policy & absolute priority for thread
824 * May be called with spinlocks held
825 * Thread mutex lock is held
826 */
827 void
thread_set_workq_override(thread_t thread,uint32_t qos)828 thread_set_workq_override(thread_t thread, uint32_t qos)
829 {
830 struct task_pend_token pend_token = {};
831
832 assert(qos < THREAD_QOS_LAST);
833
834 spl_t s = splsched();
835 thread_lock(thread);
836
837 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
838 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
839
840 assert(pend_token.tpt_update_sockets == 0);
841
842 thread_unlock(thread);
843 splx(s);
844
845 thread_policy_update_complete_unlocked(thread, &pend_token);
846 }
847
848 /*
849 * private interface for pthread workqueues
850 *
851 * Set scheduling policy & absolute priority for thread
852 * May be called with spinlocks held
853 * Thread mutex lock is not held
854 */
855 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)856 thread_set_workq_pri(thread_t thread,
857 thread_qos_t qos,
858 integer_t priority,
859 integer_t policy)
860 {
861 struct task_pend_token pend_token = {};
862 sched_mode_t mode = convert_policy_to_sched_mode(policy);
863
864 assert(qos < THREAD_QOS_LAST);
865 assert(thread->static_param);
866
867 if (!thread->static_param || !thread->active) {
868 return;
869 }
870
871 spl_t s = splsched();
872 thread_lock(thread);
873
874 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
875 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
876 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
877 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
878 0, &pend_token);
879
880 thread_unlock(thread);
881 splx(s);
882
883 /* Concern: this doesn't hold the mutex... */
884
885 __assert_only kern_return_t kr;
886 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
887 &pend_token);
888 assert(kr == KERN_SUCCESS);
889
890 if (pend_token.tpt_update_thread_sfi) {
891 sfi_reevaluate(thread);
892 }
893 }
894
895 /*
896 * thread_set_mode_and_absolute_pri:
897 *
898 * Set scheduling policy & absolute priority for thread, for deprecated
899 * thread_set_policy and thread_policy interfaces.
900 *
901 * Called with nothing locked.
902 */
903 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)904 thread_set_mode_and_absolute_pri(thread_t thread,
905 integer_t policy,
906 integer_t priority)
907 {
908 kern_return_t kr = KERN_SUCCESS;
909 struct task_pend_token pend_token = {};
910
911 sched_mode_t mode = convert_policy_to_sched_mode(policy);
912
913 thread_mtx_lock(thread);
914
915 if (!thread->active) {
916 kr = KERN_TERMINATED;
917 goto unlock;
918 }
919
920 if (thread_is_static_param(thread)) {
921 kr = KERN_POLICY_STATIC;
922 goto unlock;
923 }
924
925 /* Setting legacy policies on threads kills the current QoS */
926 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
927 thread_remove_qos_policy_locked(thread, &pend_token);
928 }
929
930 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
931
932 unlock:
933 thread_mtx_unlock(thread);
934
935 thread_policy_update_complete_unlocked(thread, &pend_token);
936
937 return kr;
938 }
939
940 /*
941 * Set the thread's requested mode and recompute priority
942 * Called with thread mutex and thread locked
943 *
944 * TODO: Mitigate potential problems caused by moving thread to end of runq
945 * whenever its priority is recomputed
946 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
947 */
948 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)949 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
950 {
951 if (thread->policy_reset) {
952 return;
953 }
954
955 boolean_t removed = thread_run_queue_remove(thread);
956
957 sched_set_thread_mode_user(thread, mode);
958
959 thread_recompute_priority(thread);
960
961 if (removed) {
962 thread_run_queue_reinsert(thread, SCHED_TAILQ);
963 }
964 }
965
966 /* called at splsched with thread lock locked */
967 static void
thread_update_qos_cpu_time_locked(thread_t thread)968 thread_update_qos_cpu_time_locked(thread_t thread)
969 {
970 task_t task = get_threadtask(thread);
971 uint64_t timer_sum, timer_delta;
972
973 /*
974 * This is only as accurate the thread's last context switch or user/kernel
975 * transition (unless precise user/kernel time is disabled).
976 *
977 * TODO: Consider running an update operation here to update it first.
978 * Maybe doable with interrupts disabled from current thread.
979 * If the thread is on a different core, may not be easy to get right.
980 */
981
982 timer_sum = recount_thread_time_mach(thread);
983 timer_delta = timer_sum - thread->vtimer_qos_save;
984
985 thread->vtimer_qos_save = timer_sum;
986
987 uint64_t* task_counter = NULL;
988
989 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
990 switch (thread->effective_policy.thep_qos) {
991 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
992 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
993 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
994 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
995 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
996 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
997 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
998 default:
999 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1000 }
1001
1002 OSAddAtomic64(timer_delta, task_counter);
1003
1004 /* Update the task-level qos stats atomically, because we don't have the task lock. */
1005 switch (thread->requested_policy.thrp_qos) {
1006 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1007 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1008 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1009 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1010 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1011 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1012 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1013 default:
1014 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1015 }
1016
1017 OSAddAtomic64(timer_delta, task_counter);
1018 }
1019
1020 /*
1021 * called with no thread locks held
1022 * may hold task lock
1023 */
1024 void
thread_update_qos_cpu_time(thread_t thread)1025 thread_update_qos_cpu_time(thread_t thread)
1026 {
1027 thread_mtx_lock(thread);
1028
1029 spl_t s = splsched();
1030 thread_lock(thread);
1031
1032 thread_update_qos_cpu_time_locked(thread);
1033
1034 thread_unlock(thread);
1035 splx(s);
1036
1037 thread_mtx_unlock(thread);
1038 }
1039
1040 /*
1041 * Calculate base priority from thread attributes, and set it on the thread
1042 *
1043 * Called with thread_lock and thread mutex held.
1044 */
1045 void
thread_recompute_priority(thread_t thread)1046 thread_recompute_priority(
1047 thread_t thread)
1048 {
1049 integer_t priority;
1050 integer_t adj_priority;
1051 bool wi_priority = false;
1052
1053 if (thread->policy_reset) {
1054 return;
1055 }
1056
1057 if (thread->sched_mode == TH_MODE_REALTIME) {
1058 uint8_t i = thread->realtime.priority_offset;
1059 assert((i >= 0) && (i < NRTQS));
1060 priority = BASEPRI_RTQUEUES + i;
1061
1062 sched_set_thread_base_priority(thread, priority);
1063 if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1064 /* Make sure the thread has a valid deadline */
1065 uint64_t ctime = mach_absolute_time();
1066 thread->realtime.deadline = thread->realtime.constraint + ctime;
1067 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1068 (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1069 }
1070 return;
1071
1072 /*
1073 * A thread may have joined a RT work interval but then never
1074 * changed its sched mode or have been demoted. RT work
1075 * intervals will have RT priorities - ignore the priority if
1076 * the thread isn't RT.
1077 */
1078 } else if (thread->effective_policy.thep_wi_driven &&
1079 work_interval_get_priority(thread) < BASEPRI_RTQUEUES) {
1080 priority = work_interval_get_priority(thread);
1081 wi_priority = true;
1082 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1083 int qos = thread->effective_policy.thep_qos;
1084 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1085 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1086 int qos_scaled_relprio;
1087
1088 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1089 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1090
1091 priority = thread_qos_policy_params.qos_pri[qos];
1092 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1093
1094 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1095 /* Bump priority 46 to 47 when in a frontmost app */
1096 qos_scaled_relprio += 1;
1097 }
1098
1099 /* TODO: factor in renice priority here? */
1100
1101 priority += qos_scaled_relprio;
1102 } else {
1103 if (thread->importance > MAXPRI) {
1104 priority = MAXPRI;
1105 } else if (thread->importance < -MAXPRI) {
1106 priority = -MAXPRI;
1107 } else {
1108 priority = thread->importance;
1109 }
1110
1111 priority += thread->task_priority;
1112 }
1113
1114 /* Boost the priority of threads which are RT demoted. */
1115 if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
1116 priority = MAX(priority, MAXPRI_USER);
1117 }
1118
1119 priority = MAX(priority, thread->user_promotion_basepri);
1120
1121 /*
1122 * Clamp priority back into the allowed range for this task.
1123 * The initial priority value could be out of this range due to:
1124 * Task clamped to BG or Utility (max-pri is 4, or 20)
1125 * Task is user task (max-pri is 63)
1126 * Task is kernel task (max-pri is 95)
1127 * Note that thread->importance is user-settable to any integer
1128 * via THREAD_PRECEDENCE_POLICY.
1129 */
1130 adj_priority = priority;
1131 adj_priority = MIN(adj_priority, thread->max_priority);
1132 adj_priority = MAX(adj_priority, MINPRI);
1133
1134 /* Allow workload driven priorities to exceed max_priority. */
1135 if (wi_priority) {
1136 adj_priority = MAX(adj_priority, priority);
1137 }
1138
1139 /* Allow priority to exceed max_priority for promotions. */
1140 if (thread->effective_policy.thep_promote_above_task) {
1141 adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1142 }
1143 priority = adj_priority;
1144 assert3u(priority, <=, MAXPRI);
1145
1146 if (thread->saved_mode == TH_MODE_REALTIME &&
1147 sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) {
1148 priority = DEPRESSPRI;
1149 }
1150
1151 if (thread->effective_policy.thep_terminated == TRUE) {
1152 /*
1153 * We temporarily want to override the expected priority to
1154 * ensure that the thread exits in a timely manner.
1155 * Note that this is allowed to exceed thread->max_priority
1156 * so that the thread is no longer clamped to background
1157 * during the final exit phase.
1158 */
1159 if (priority < thread->task_priority) {
1160 priority = thread->task_priority;
1161 }
1162 if (priority < BASEPRI_DEFAULT) {
1163 priority = BASEPRI_DEFAULT;
1164 }
1165 }
1166
1167 #if !defined(XNU_TARGET_OS_OSX)
1168 /* No one can have a base priority less than MAXPRI_THROTTLE */
1169 if (priority < MAXPRI_THROTTLE) {
1170 priority = MAXPRI_THROTTLE;
1171 }
1172 #endif /* !defined(XNU_TARGET_OS_OSX) */
1173
1174 sched_set_thread_base_priority(thread, priority);
1175 }
1176
1177 /* Called with the task lock held, but not the thread mutex or spinlock */
1178 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1179 thread_policy_update_tasklocked(
1180 thread_t thread,
1181 integer_t priority,
1182 integer_t max_priority,
1183 task_pend_token_t pend_token)
1184 {
1185 thread_mtx_lock(thread);
1186
1187 if (!thread->active || thread->policy_reset) {
1188 thread_mtx_unlock(thread);
1189 return;
1190 }
1191
1192 spl_t s = splsched();
1193 thread_lock(thread);
1194
1195 __unused
1196 integer_t old_max_priority = thread->max_priority;
1197
1198 assert(priority >= INT16_MIN && priority <= INT16_MAX);
1199 thread->task_priority = (int16_t)priority;
1200
1201 assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1202 thread->max_priority = (int16_t)max_priority;
1203
1204 /*
1205 * When backgrounding a thread, realtime and fixed priority threads
1206 * should be demoted to timeshare background threads.
1207 *
1208 * TODO: Do this inside the thread policy update routine in order to avoid double
1209 * remove/reinsert for a runnable thread
1210 */
1211 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1212 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1213 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1214 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1215 }
1216
1217 thread_policy_update_spinlocked(thread, true, pend_token);
1218
1219 thread_unlock(thread);
1220 splx(s);
1221
1222 thread_mtx_unlock(thread);
1223 }
1224
1225 /*
1226 * Reset thread to default state in preparation for termination
1227 * Called with thread mutex locked
1228 *
1229 * Always called on current thread, so we don't need a run queue remove
1230 */
1231 void
thread_policy_reset(thread_t thread)1232 thread_policy_reset(
1233 thread_t thread)
1234 {
1235 spl_t s;
1236
1237 assert(thread == current_thread());
1238
1239 s = splsched();
1240 thread_lock(thread);
1241
1242 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1243 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1244 }
1245
1246 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1247 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1248 }
1249
1250 if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) {
1251 sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
1252 }
1253
1254 /* At this point, the various demotions should be inactive */
1255 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1256 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1257
1258 /* Reset thread back to task-default basepri and mode */
1259 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1260
1261 sched_set_thread_mode(thread, newmode);
1262
1263 thread->importance = 0;
1264
1265 /* Prevent further changes to thread base priority or mode */
1266 thread->policy_reset = 1;
1267
1268 sched_set_thread_base_priority(thread, thread->task_priority);
1269
1270 thread_unlock(thread);
1271 splx(s);
1272 }
1273
1274 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1275 thread_policy_get(
1276 thread_t thread,
1277 thread_policy_flavor_t flavor,
1278 thread_policy_t policy_info,
1279 mach_msg_type_number_t *count,
1280 boolean_t *get_default)
1281 {
1282 kern_return_t result = KERN_SUCCESS;
1283
1284 if (thread == THREAD_NULL) {
1285 return KERN_INVALID_ARGUMENT;
1286 }
1287
1288 thread_mtx_lock(thread);
1289 if (!thread->active) {
1290 thread_mtx_unlock(thread);
1291
1292 return KERN_TERMINATED;
1293 }
1294
1295 switch (flavor) {
1296 case THREAD_EXTENDED_POLICY:
1297 {
1298 boolean_t timeshare = TRUE;
1299
1300 if (!(*get_default)) {
1301 spl_t s = splsched();
1302 thread_lock(thread);
1303
1304 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1305 (thread->saved_mode != TH_MODE_REALTIME)) {
1306 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1307 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1308 } else {
1309 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1310 }
1311 } else {
1312 *get_default = TRUE;
1313 }
1314
1315 thread_unlock(thread);
1316 splx(s);
1317 }
1318
1319 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1320 thread_extended_policy_t info;
1321
1322 info = (thread_extended_policy_t)policy_info;
1323 info->timeshare = timeshare;
1324 }
1325
1326 break;
1327 }
1328
1329 case THREAD_TIME_CONSTRAINT_POLICY:
1330 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1331 {
1332 thread_time_constraint_with_priority_policy_t info;
1333
1334 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1335 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1336 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1337
1338 if (*count < min_count) {
1339 result = KERN_INVALID_ARGUMENT;
1340 break;
1341 }
1342
1343 info = (thread_time_constraint_with_priority_policy_t)policy_info;
1344
1345 if (!(*get_default)) {
1346 spl_t s = splsched();
1347 thread_lock(thread);
1348
1349 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1350 (thread->saved_mode == TH_MODE_REALTIME)) {
1351 info->period = thread->realtime.period;
1352 info->computation = thread->realtime.computation;
1353 info->constraint = thread->realtime.constraint;
1354 info->preemptible = thread->realtime.preemptible;
1355 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1356 info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1357 }
1358 } else {
1359 *get_default = TRUE;
1360 }
1361
1362 thread_unlock(thread);
1363 splx(s);
1364 }
1365
1366 if (*get_default) {
1367 info->period = 0;
1368 info->computation = default_timeshare_computation;
1369 info->constraint = default_timeshare_constraint;
1370 info->preemptible = TRUE;
1371 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1372 info->priority = BASEPRI_RTQUEUES;
1373 }
1374 }
1375
1376
1377 break;
1378 }
1379
1380 case THREAD_PRECEDENCE_POLICY:
1381 {
1382 thread_precedence_policy_t info;
1383
1384 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1385 result = KERN_INVALID_ARGUMENT;
1386 break;
1387 }
1388
1389 info = (thread_precedence_policy_t)policy_info;
1390
1391 if (!(*get_default)) {
1392 spl_t s = splsched();
1393 thread_lock(thread);
1394
1395 info->importance = thread->importance;
1396
1397 thread_unlock(thread);
1398 splx(s);
1399 } else {
1400 info->importance = 0;
1401 }
1402
1403 break;
1404 }
1405
1406 case THREAD_AFFINITY_POLICY:
1407 {
1408 thread_affinity_policy_t info;
1409
1410 if (!thread_affinity_is_supported()) {
1411 result = KERN_NOT_SUPPORTED;
1412 break;
1413 }
1414 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1415 result = KERN_INVALID_ARGUMENT;
1416 break;
1417 }
1418
1419 info = (thread_affinity_policy_t)policy_info;
1420
1421 if (!(*get_default)) {
1422 info->affinity_tag = thread_affinity_get(thread);
1423 } else {
1424 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1425 }
1426
1427 break;
1428 }
1429
1430 case THREAD_POLICY_STATE:
1431 {
1432 thread_policy_state_t info;
1433
1434 if (*count < THREAD_POLICY_STATE_COUNT) {
1435 result = KERN_INVALID_ARGUMENT;
1436 break;
1437 }
1438
1439 /* Only root can get this info */
1440 if (!task_is_privileged(current_task())) {
1441 result = KERN_PROTECTION_FAILURE;
1442 break;
1443 }
1444
1445 info = (thread_policy_state_t)(void*)policy_info;
1446
1447 if (!(*get_default)) {
1448 info->flags = 0;
1449
1450 spl_t s = splsched();
1451 thread_lock(thread);
1452
1453 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1454
1455 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1456 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1457
1458 info->thps_user_promotions = 0;
1459 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1460 info->thps_ipc_overrides = thread->kevent_overrides;
1461
1462 proc_get_thread_policy_bitfield(thread, info);
1463
1464 thread_unlock(thread);
1465 splx(s);
1466 } else {
1467 info->requested = 0;
1468 info->effective = 0;
1469 info->pending = 0;
1470 }
1471
1472 break;
1473 }
1474
1475 case THREAD_REQUESTED_STATE_POLICY:
1476 {
1477 if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1478 result = KERN_INVALID_ARGUMENT;
1479 break;
1480 }
1481
1482 thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1483 struct thread_requested_policy *req_policy = &thread->requested_policy;
1484
1485 info->thrq_base_qos = req_policy->thrp_qos;
1486 info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1487 info->thrq_qos_override = req_policy->thrp_qos_override;
1488 info->thrq_qos_promote = req_policy->thrp_qos_promote;
1489 info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1490 info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1491 info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1492
1493 break;
1494 }
1495
1496 case THREAD_LATENCY_QOS_POLICY:
1497 {
1498 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1499 thread_latency_qos_t plqos;
1500
1501 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1502 result = KERN_INVALID_ARGUMENT;
1503 break;
1504 }
1505
1506 if (*get_default) {
1507 plqos = 0;
1508 } else {
1509 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1510 }
1511
1512 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1513 }
1514 break;
1515
1516 case THREAD_THROUGHPUT_QOS_POLICY:
1517 {
1518 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1519 thread_throughput_qos_t ptqos;
1520
1521 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1522 result = KERN_INVALID_ARGUMENT;
1523 break;
1524 }
1525
1526 if (*get_default) {
1527 ptqos = 0;
1528 } else {
1529 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1530 }
1531
1532 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1533 }
1534 break;
1535
1536 case THREAD_QOS_POLICY:
1537 {
1538 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1539
1540 if (*count < THREAD_QOS_POLICY_COUNT) {
1541 result = KERN_INVALID_ARGUMENT;
1542 break;
1543 }
1544
1545 if (!(*get_default)) {
1546 int relprio_value = 0;
1547 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1548 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1549
1550 info->tier_importance = -relprio_value;
1551 } else {
1552 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1553 info->tier_importance = 0;
1554 }
1555
1556 break;
1557 }
1558
1559 default:
1560 result = KERN_INVALID_ARGUMENT;
1561 break;
1562 }
1563
1564 thread_mtx_unlock(thread);
1565
1566 return result;
1567 }
1568
1569 void
thread_policy_create(thread_t thread)1570 thread_policy_create(thread_t thread)
1571 {
1572 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1573 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1574 thread_tid(thread), theffective_0(thread),
1575 theffective_1(thread), thread->base_pri, 0);
1576
1577 /* We pass a pend token but ignore it */
1578 struct task_pend_token pend_token = {};
1579
1580 thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1581
1582 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1583 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1584 thread_tid(thread), theffective_0(thread),
1585 theffective_1(thread), thread->base_pri, 0);
1586 }
1587
1588 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1589 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1590 {
1591 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1592 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1593 thread_tid(thread), theffective_0(thread),
1594 theffective_1(thread), thread->base_pri, 0);
1595
1596 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1597
1598 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1599 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1600 thread_tid(thread), theffective_0(thread),
1601 theffective_1(thread), thread->base_pri, 0);
1602 }
1603
1604
1605
1606 /*
1607 * One thread state update function TO RULE THEM ALL
1608 *
1609 * This function updates the thread effective policy fields
1610 * and pushes the results to the relevant subsystems.
1611 *
1612 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1613 */
1614 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1615 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1616 task_pend_token_t pend_token)
1617 {
1618 /*
1619 * Step 1:
1620 * Gather requested policy and effective task state
1621 */
1622
1623 const struct thread_requested_policy requested = thread->requested_policy;
1624 const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1625
1626 /*
1627 * Step 2:
1628 * Calculate new effective policies from requested policy, task and thread state
1629 * Rules:
1630 * Don't change requested, it won't take effect
1631 */
1632
1633 struct thread_effective_policy next = {};
1634
1635 next.thep_wi_driven = requested.thrp_wi_driven;
1636
1637 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1638
1639 uint32_t next_qos = requested.thrp_qos;
1640
1641 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1642 next_qos = MAX(requested.thrp_qos_override, next_qos);
1643 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1644 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1645 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1646 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1647 }
1648
1649 if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1650 requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1651 /*
1652 * This thread is turnstile-boosted higher than the adaptive clamp
1653 * by a synchronous waiter. Allow that to override the adaptive
1654 * clamp temporarily for this thread only.
1655 */
1656 next.thep_promote_above_task = true;
1657 next_qos = requested.thrp_qos_promote;
1658 }
1659
1660 next.thep_qos = next_qos;
1661
1662 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1663 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1664 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1665 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1666 } else {
1667 next.thep_qos = task_effective.tep_qos_clamp;
1668 }
1669 next.thep_wi_driven = 0;
1670 }
1671
1672 /*
1673 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1674 * This allows QoS promotions to work properly even after the process is unclamped.
1675 */
1676 next.thep_qos_promote = next.thep_qos;
1677
1678 /* The ceiling only applies to threads that are in the QoS world */
1679 /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1680 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1681 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1682 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1683 }
1684
1685 /*
1686 * The QoS relative priority is only applicable when the original programmer's
1687 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1688 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1689 * since otherwise it would be lower than unclamped threads. Similarly, in the
1690 * presence of boosting, the programmer doesn't know what other actors
1691 * are boosting the thread.
1692 */
1693 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1694 (requested.thrp_qos == next.thep_qos) &&
1695 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1696 next.thep_qos_relprio = requested.thrp_qos_relprio;
1697 } else {
1698 next.thep_qos_relprio = 0;
1699 }
1700
1701 /* Calculate DARWIN_BG */
1702 bool wants_darwinbg = false;
1703 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */
1704
1705 if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1706 wants_darwinbg = true;
1707 }
1708
1709 /*
1710 * If DARWIN_BG has been requested at either level, it's engaged.
1711 * darwinbg threads always create bg sockets,
1712 * but only some types of darwinbg change the sockets
1713 * after they're created
1714 */
1715 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1716 wants_all_sockets_bg = wants_darwinbg = true;
1717 }
1718
1719 if (requested.thrp_pidbind_bg) {
1720 wants_all_sockets_bg = wants_darwinbg = true;
1721 }
1722
1723 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1724 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1725 wants_darwinbg = true;
1726 }
1727
1728 /* Calculate side effects of DARWIN_BG */
1729
1730 if (wants_darwinbg) {
1731 next.thep_darwinbg = 1;
1732 next.thep_wi_driven = 0;
1733 }
1734
1735 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1736 next.thep_new_sockets_bg = 1;
1737 }
1738
1739 /* Don't use task_effective.tep_all_sockets_bg here */
1740 if (wants_all_sockets_bg) {
1741 next.thep_all_sockets_bg = 1;
1742 }
1743
1744 /* darwinbg implies background QOS (or lower) */
1745 if (next.thep_darwinbg &&
1746 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1747 next.thep_qos = THREAD_QOS_BACKGROUND;
1748 next.thep_qos_relprio = 0;
1749 }
1750
1751 /* Calculate IO policy */
1752
1753 int iopol = THROTTLE_LEVEL_TIER0;
1754
1755 /* Factor in the task's IO policy */
1756 if (next.thep_darwinbg) {
1757 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1758 }
1759
1760 if (!next.thep_promote_above_task) {
1761 iopol = MAX(iopol, task_effective.tep_io_tier);
1762 }
1763
1764 /* Look up the associated IO tier value for the QoS class */
1765 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1766
1767 iopol = MAX(iopol, requested.thrp_int_iotier);
1768 iopol = MAX(iopol, requested.thrp_ext_iotier);
1769
1770 /* Apply the kevent iotier override */
1771 iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1772
1773 next.thep_io_tier = iopol;
1774
1775 /*
1776 * If a QoS override is causing IO to go into a lower tier, we also set
1777 * the passive bit so that a thread doesn't end up stuck in its own throttle
1778 * window when the override goes away.
1779 */
1780
1781 int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1782 int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1783 bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1784
1785 /* Calculate Passive IO policy */
1786 if (requested.thrp_ext_iopassive ||
1787 requested.thrp_int_iopassive ||
1788 qos_io_override_active ||
1789 task_effective.tep_io_passive) {
1790 next.thep_io_passive = 1;
1791 }
1792
1793 /* Calculate timer QOS */
1794 uint32_t latency_qos = requested.thrp_latency_qos;
1795
1796 if (!next.thep_promote_above_task) {
1797 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1798 }
1799
1800 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1801
1802 next.thep_latency_qos = latency_qos;
1803
1804 /* Calculate throughput QOS */
1805 uint32_t through_qos = requested.thrp_through_qos;
1806
1807 if (!next.thep_promote_above_task) {
1808 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1809 }
1810
1811 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1812
1813 next.thep_through_qos = through_qos;
1814
1815 if (task_effective.tep_terminated || requested.thrp_terminated) {
1816 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1817 next.thep_terminated = 1;
1818 next.thep_darwinbg = 0;
1819 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1820 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1821 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1822 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1823 next.thep_wi_driven = 0;
1824 }
1825
1826 /*
1827 * Step 3:
1828 * Swap out old policy for new policy
1829 */
1830
1831 struct thread_effective_policy prev = thread->effective_policy;
1832
1833 thread_update_qos_cpu_time_locked(thread);
1834
1835 /* This is the point where the new values become visible to other threads */
1836 thread->effective_policy = next;
1837
1838 /*
1839 * Step 4:
1840 * Pend updates that can't be done while holding the thread lock
1841 */
1842
1843 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1844 pend_token->tpt_update_sockets = 1;
1845 }
1846
1847 /* TODO: Doesn't this only need to be done if the throttle went up? */
1848 if (prev.thep_io_tier != next.thep_io_tier) {
1849 pend_token->tpt_update_throttle = 1;
1850 }
1851
1852 /*
1853 * Check for the attributes that sfi_thread_classify() consults,
1854 * and trigger SFI re-evaluation.
1855 */
1856 if (prev.thep_qos != next.thep_qos ||
1857 prev.thep_darwinbg != next.thep_darwinbg) {
1858 pend_token->tpt_update_thread_sfi = 1;
1859 }
1860
1861 integer_t old_base_pri = thread->base_pri;
1862
1863 /*
1864 * Step 5:
1865 * Update other subsystems as necessary if something has changed
1866 */
1867
1868 /* Check for the attributes that thread_recompute_priority() consults */
1869 if (prev.thep_qos != next.thep_qos ||
1870 prev.thep_qos_relprio != next.thep_qos_relprio ||
1871 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1872 prev.thep_promote_above_task != next.thep_promote_above_task ||
1873 prev.thep_terminated != next.thep_terminated ||
1874 prev.thep_wi_driven != next.thep_wi_driven ||
1875 pend_token->tpt_force_recompute_pri == 1 ||
1876 recompute_priority) {
1877 thread_recompute_priority(thread);
1878 }
1879
1880 /*
1881 * Check if the thread is waiting on a turnstile and needs priority propagation.
1882 */
1883 if (pend_token->tpt_update_turnstile &&
1884 ((old_base_pri == thread->base_pri) ||
1885 !thread_get_waiting_turnstile(thread))) {
1886 /*
1887 * Reset update turnstile pend token since either
1888 * the thread priority did not change or thread is
1889 * not blocked on a turnstile.
1890 */
1891 pend_token->tpt_update_turnstile = 0;
1892 }
1893 }
1894
1895
1896 /*
1897 * Initiate a thread policy state transition on a thread with its TID
1898 * Useful if you cannot guarantee the thread won't get terminated
1899 * Precondition: No locks are held
1900 * Will take task lock - using the non-tid variant is faster
1901 * if you already have a thread ref.
1902 */
1903 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1904 proc_set_thread_policy_with_tid(task_t task,
1905 uint64_t tid,
1906 int category,
1907 int flavor,
1908 int value)
1909 {
1910 /* takes task lock, returns ref'ed thread or NULL */
1911 thread_t thread = task_findtid(task, tid);
1912
1913 if (thread == THREAD_NULL) {
1914 return;
1915 }
1916
1917 proc_set_thread_policy(thread, category, flavor, value);
1918
1919 thread_deallocate(thread);
1920 }
1921
1922 /*
1923 * Initiate a thread policy transition on a thread
1924 * This path supports networking transitions (i.e. darwinbg transitions)
1925 * Precondition: No locks are held
1926 */
1927 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1928 proc_set_thread_policy(thread_t thread,
1929 int category,
1930 int flavor,
1931 int value)
1932 {
1933 proc_set_thread_policy_ext(thread, category, flavor, value, 0);
1934 }
1935
1936 void
proc_set_thread_policy_ext(thread_t thread,int category,int flavor,int value,int value2)1937 proc_set_thread_policy_ext(thread_t thread,
1938 int category,
1939 int flavor,
1940 int value,
1941 int value2)
1942 {
1943 struct task_pend_token pend_token = {};
1944
1945 thread_mtx_lock(thread);
1946
1947 proc_set_thread_policy_locked(thread, category, flavor, value, value2, &pend_token);
1948
1949 thread_mtx_unlock(thread);
1950
1951 thread_policy_update_complete_unlocked(thread, &pend_token);
1952 }
1953
1954 /*
1955 * Do the things that can't be done while holding a thread mutex.
1956 * These are set up to call back into thread policy to get the latest value,
1957 * so they don't have to be synchronized with the update.
1958 * The only required semantic is 'call this sometime after updating effective policy'
1959 *
1960 * Precondition: Thread mutex is not held
1961 *
1962 * This may be called with the task lock held, but in that case it won't be
1963 * called with tpt_update_sockets set.
1964 */
1965 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1966 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1967 {
1968 #ifdef MACH_BSD
1969 if (pend_token->tpt_update_sockets) {
1970 proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1971 }
1972 #endif /* MACH_BSD */
1973
1974 if (pend_token->tpt_update_throttle) {
1975 rethrottle_thread(get_bsdthread_info(thread));
1976 }
1977
1978 if (pend_token->tpt_update_thread_sfi) {
1979 sfi_reevaluate(thread);
1980 }
1981
1982 if (pend_token->tpt_update_turnstile) {
1983 turnstile_update_thread_priority_chain(thread);
1984 }
1985 }
1986
1987 /*
1988 * Set and update thread policy
1989 * Thread mutex might be held
1990 */
1991 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1992 proc_set_thread_policy_locked(thread_t thread,
1993 int category,
1994 int flavor,
1995 int value,
1996 int value2,
1997 task_pend_token_t pend_token)
1998 {
1999 spl_t s = splsched();
2000 thread_lock(thread);
2001
2002 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2003
2004 thread_unlock(thread);
2005 splx(s);
2006 }
2007
2008 /*
2009 * Set and update thread policy
2010 * Thread spinlock is held
2011 */
2012 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2013 proc_set_thread_policy_spinlocked(thread_t thread,
2014 int category,
2015 int flavor,
2016 int value,
2017 int value2,
2018 task_pend_token_t pend_token)
2019 {
2020 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2021 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2022 thread_tid(thread), threquested_0(thread),
2023 threquested_1(thread), value, 0);
2024
2025 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2026
2027 thread_policy_update_spinlocked(thread, false, pend_token);
2028
2029 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2030 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2031 thread_tid(thread), threquested_0(thread),
2032 threquested_1(thread), tpending(pend_token), 0);
2033 }
2034
2035 /*
2036 * Set the requested state for a specific flavor to a specific value.
2037 */
2038 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2039 thread_set_requested_policy_spinlocked(thread_t thread,
2040 int category,
2041 int flavor,
2042 int value,
2043 int value2,
2044 task_pend_token_t pend_token)
2045 {
2046 int tier, passive;
2047
2048 struct thread_requested_policy requested = thread->requested_policy;
2049
2050 switch (flavor) {
2051 /* Category: EXTERNAL and INTERNAL, thread and task */
2052
2053 case TASK_POLICY_DARWIN_BG:
2054 if (category == TASK_POLICY_EXTERNAL) {
2055 requested.thrp_ext_darwinbg = value;
2056 } else {
2057 requested.thrp_int_darwinbg = value;
2058 }
2059 pend_token->tpt_update_turnstile = 1;
2060 break;
2061
2062 case TASK_POLICY_IOPOL:
2063 proc_iopol_to_tier(value, &tier, &passive);
2064 if (category == TASK_POLICY_EXTERNAL) {
2065 requested.thrp_ext_iotier = tier;
2066 requested.thrp_ext_iopassive = passive;
2067 } else {
2068 requested.thrp_int_iotier = tier;
2069 requested.thrp_int_iopassive = passive;
2070 }
2071 break;
2072
2073 case TASK_POLICY_IO:
2074 if (category == TASK_POLICY_EXTERNAL) {
2075 requested.thrp_ext_iotier = value;
2076 } else {
2077 requested.thrp_int_iotier = value;
2078 }
2079 break;
2080
2081 case TASK_POLICY_PASSIVE_IO:
2082 if (category == TASK_POLICY_EXTERNAL) {
2083 requested.thrp_ext_iopassive = value;
2084 } else {
2085 requested.thrp_int_iopassive = value;
2086 }
2087 break;
2088
2089 /* Category: ATTRIBUTE, thread only */
2090
2091 case TASK_POLICY_PIDBIND_BG:
2092 assert(category == TASK_POLICY_ATTRIBUTE);
2093 requested.thrp_pidbind_bg = value;
2094 pend_token->tpt_update_turnstile = 1;
2095 break;
2096
2097 case TASK_POLICY_LATENCY_QOS:
2098 assert(category == TASK_POLICY_ATTRIBUTE);
2099 requested.thrp_latency_qos = value;
2100 break;
2101
2102 case TASK_POLICY_THROUGH_QOS:
2103 assert(category == TASK_POLICY_ATTRIBUTE);
2104 requested.thrp_through_qos = value;
2105 break;
2106
2107 case TASK_POLICY_QOS_OVERRIDE:
2108 assert(category == TASK_POLICY_ATTRIBUTE);
2109 requested.thrp_qos_override = value;
2110 pend_token->tpt_update_turnstile = 1;
2111 break;
2112
2113 case TASK_POLICY_QOS_AND_RELPRIO:
2114 assert(category == TASK_POLICY_ATTRIBUTE);
2115 requested.thrp_qos = value;
2116 requested.thrp_qos_relprio = value2;
2117 pend_token->tpt_update_turnstile = 1;
2118 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2119 break;
2120
2121 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2122 assert(category == TASK_POLICY_ATTRIBUTE);
2123 requested.thrp_qos_workq_override = value;
2124 pend_token->tpt_update_turnstile = 1;
2125 break;
2126
2127 case TASK_POLICY_QOS_PROMOTE:
2128 assert(category == TASK_POLICY_ATTRIBUTE);
2129 requested.thrp_qos_promote = value;
2130 break;
2131
2132 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2133 assert(category == TASK_POLICY_ATTRIBUTE);
2134 requested.thrp_qos_kevent_override = value;
2135 pend_token->tpt_update_turnstile = 1;
2136 break;
2137
2138 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2139 assert(category == TASK_POLICY_ATTRIBUTE);
2140 requested.thrp_qos_wlsvc_override = value;
2141 pend_token->tpt_update_turnstile = 1;
2142 break;
2143
2144 case TASK_POLICY_TERMINATED:
2145 assert(category == TASK_POLICY_ATTRIBUTE);
2146 requested.thrp_terminated = value;
2147 break;
2148
2149 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2150 assert(category == TASK_POLICY_ATTRIBUTE);
2151 requested.thrp_iotier_kevent_override = value;
2152 break;
2153
2154 case TASK_POLICY_WI_DRIVEN:
2155 assert(category == TASK_POLICY_ATTRIBUTE);
2156 assert(thread == current_thread());
2157
2158 const bool set_policy = value;
2159 const sched_mode_t mode = value2;
2160
2161 requested.thrp_wi_driven = set_policy ? 1 : 0;
2162
2163 /*
2164 * No sched mode change for REALTIME (threads must explicitly
2165 * opt-in), however the priority_offset needs to be updated.
2166 */
2167 if (mode == TH_MODE_REALTIME) {
2168 const int pri = work_interval_get_priority(thread);
2169 assert3u(pri, >=, BASEPRI_RTQUEUES);
2170 thread->realtime.priority_offset = set_policy ?
2171 (uint8_t)(pri - BASEPRI_RTQUEUES) : 0;
2172 } else {
2173 sched_set_thread_mode_user(thread, mode);
2174 if (set_policy) {
2175 thread->static_param = true;
2176 }
2177 }
2178 break;
2179
2180 default:
2181 panic("unknown task policy: %d %d %d", category, flavor, value);
2182 break;
2183 }
2184
2185 thread->requested_policy = requested;
2186 }
2187
2188 /*
2189 * Gets what you set. Effective values may be different.
2190 * Precondition: No locks are held
2191 */
2192 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2193 proc_get_thread_policy(thread_t thread,
2194 int category,
2195 int flavor)
2196 {
2197 int value = 0;
2198 thread_mtx_lock(thread);
2199 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2200 thread_mtx_unlock(thread);
2201 return value;
2202 }
2203
2204 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2205 proc_get_thread_policy_locked(thread_t thread,
2206 int category,
2207 int flavor,
2208 int* value2)
2209 {
2210 int value = 0;
2211
2212 spl_t s = splsched();
2213 thread_lock(thread);
2214
2215 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2216
2217 thread_unlock(thread);
2218 splx(s);
2219
2220 return value;
2221 }
2222
2223 /*
2224 * Gets what you set. Effective values may be different.
2225 */
2226 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2227 thread_get_requested_policy_spinlocked(thread_t thread,
2228 int category,
2229 int flavor,
2230 int* value2)
2231 {
2232 int value = 0;
2233
2234 struct thread_requested_policy requested = thread->requested_policy;
2235
2236 switch (flavor) {
2237 case TASK_POLICY_DARWIN_BG:
2238 if (category == TASK_POLICY_EXTERNAL) {
2239 value = requested.thrp_ext_darwinbg;
2240 } else {
2241 value = requested.thrp_int_darwinbg;
2242 }
2243 break;
2244 case TASK_POLICY_IOPOL:
2245 if (category == TASK_POLICY_EXTERNAL) {
2246 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2247 requested.thrp_ext_iopassive);
2248 } else {
2249 value = proc_tier_to_iopol(requested.thrp_int_iotier,
2250 requested.thrp_int_iopassive);
2251 }
2252 break;
2253 case TASK_POLICY_IO:
2254 if (category == TASK_POLICY_EXTERNAL) {
2255 value = requested.thrp_ext_iotier;
2256 } else {
2257 value = requested.thrp_int_iotier;
2258 }
2259 break;
2260 case TASK_POLICY_PASSIVE_IO:
2261 if (category == TASK_POLICY_EXTERNAL) {
2262 value = requested.thrp_ext_iopassive;
2263 } else {
2264 value = requested.thrp_int_iopassive;
2265 }
2266 break;
2267 case TASK_POLICY_QOS:
2268 assert(category == TASK_POLICY_ATTRIBUTE);
2269 value = requested.thrp_qos;
2270 break;
2271 case TASK_POLICY_QOS_OVERRIDE:
2272 assert(category == TASK_POLICY_ATTRIBUTE);
2273 value = requested.thrp_qos_override;
2274 break;
2275 case TASK_POLICY_LATENCY_QOS:
2276 assert(category == TASK_POLICY_ATTRIBUTE);
2277 value = requested.thrp_latency_qos;
2278 break;
2279 case TASK_POLICY_THROUGH_QOS:
2280 assert(category == TASK_POLICY_ATTRIBUTE);
2281 value = requested.thrp_through_qos;
2282 break;
2283 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2284 assert(category == TASK_POLICY_ATTRIBUTE);
2285 value = requested.thrp_qos_workq_override;
2286 break;
2287 case TASK_POLICY_QOS_AND_RELPRIO:
2288 assert(category == TASK_POLICY_ATTRIBUTE);
2289 assert(value2 != NULL);
2290 value = requested.thrp_qos;
2291 *value2 = requested.thrp_qos_relprio;
2292 break;
2293 case TASK_POLICY_QOS_PROMOTE:
2294 assert(category == TASK_POLICY_ATTRIBUTE);
2295 value = requested.thrp_qos_promote;
2296 break;
2297 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2298 assert(category == TASK_POLICY_ATTRIBUTE);
2299 value = requested.thrp_qos_kevent_override;
2300 break;
2301 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2302 assert(category == TASK_POLICY_ATTRIBUTE);
2303 value = requested.thrp_qos_wlsvc_override;
2304 break;
2305 case TASK_POLICY_TERMINATED:
2306 assert(category == TASK_POLICY_ATTRIBUTE);
2307 value = requested.thrp_terminated;
2308 break;
2309 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2310 assert(category == TASK_POLICY_ATTRIBUTE);
2311 value = requested.thrp_iotier_kevent_override;
2312 break;
2313
2314 case TASK_POLICY_WI_DRIVEN:
2315 assert(category == TASK_POLICY_ATTRIBUTE);
2316 value = requested.thrp_wi_driven;
2317 break;
2318
2319 default:
2320 panic("unknown policy_flavor %d", flavor);
2321 break;
2322 }
2323
2324 return value;
2325 }
2326
2327 /*
2328 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2329 *
2330 * NOTE: This accessor does not take the task or thread lock.
2331 * Notifications of state updates need to be externally synchronized with state queries.
2332 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2333 * within the context of a timer interrupt.
2334 *
2335 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2336 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2337 * I don't think that cost is worth not having the right answer.
2338 */
2339 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2340 proc_get_effective_thread_policy(thread_t thread,
2341 int flavor)
2342 {
2343 int value = 0;
2344
2345 switch (flavor) {
2346 case TASK_POLICY_DARWIN_BG:
2347 /*
2348 * This call is used within the timer layer, as well as
2349 * prioritizing requests to the graphics system.
2350 * It also informs SFI and originator-bg-state.
2351 * Returns 1 for background mode, 0 for normal mode
2352 */
2353
2354 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2355 break;
2356 case TASK_POLICY_IO:
2357 /*
2358 * The I/O system calls here to find out what throttling tier to apply to an operation.
2359 * Returns THROTTLE_LEVEL_* values
2360 */
2361 value = thread->effective_policy.thep_io_tier;
2362 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2363 value = MIN(value, thread->iotier_override);
2364 }
2365 break;
2366 case TASK_POLICY_PASSIVE_IO:
2367 /*
2368 * The I/O system calls here to find out whether an operation should be passive.
2369 * (i.e. not cause operations with lower throttle tiers to be throttled)
2370 * Returns 1 for passive mode, 0 for normal mode
2371 *
2372 * If an override is causing IO to go into a lower tier, we also set
2373 * the passive bit so that a thread doesn't end up stuck in its own throttle
2374 * window when the override goes away.
2375 */
2376 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2377 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2378 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2379 value = 1;
2380 }
2381 break;
2382 case TASK_POLICY_ALL_SOCKETS_BG:
2383 /*
2384 * do_background_socket() calls this to determine whether
2385 * it should change the thread's sockets
2386 * Returns 1 for background mode, 0 for normal mode
2387 * This consults both thread and task so un-DBGing a thread while the task is BG
2388 * doesn't get you out of the network throttle.
2389 */
2390 value = (thread->effective_policy.thep_all_sockets_bg ||
2391 get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2392 break;
2393 case TASK_POLICY_NEW_SOCKETS_BG:
2394 /*
2395 * socreate() calls this to determine if it should mark a new socket as background
2396 * Returns 1 for background mode, 0 for normal mode
2397 */
2398 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2399 break;
2400 case TASK_POLICY_LATENCY_QOS:
2401 /*
2402 * timer arming calls into here to find out the timer coalescing level
2403 * Returns a latency QoS tier (0-6)
2404 */
2405 value = thread->effective_policy.thep_latency_qos;
2406 break;
2407 case TASK_POLICY_THROUGH_QOS:
2408 /*
2409 * This value is passed into the urgency callout from the scheduler
2410 * to the performance management subsystem.
2411 *
2412 * Returns a throughput QoS tier (0-6)
2413 */
2414 value = thread->effective_policy.thep_through_qos;
2415 break;
2416 case TASK_POLICY_QOS:
2417 /*
2418 * This is communicated to the performance management layer and SFI.
2419 *
2420 * Returns a QoS policy tier
2421 */
2422 value = thread->effective_policy.thep_qos;
2423 break;
2424 default:
2425 panic("unknown thread policy flavor %d", flavor);
2426 break;
2427 }
2428
2429 return value;
2430 }
2431
2432
2433 /*
2434 * (integer_t) casts limit the number of bits we can fit here
2435 * this interface is deprecated and replaced by the _EXT struct ?
2436 */
2437 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2438 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2439 {
2440 uint64_t bits = 0;
2441 struct thread_requested_policy requested = thread->requested_policy;
2442
2443 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2444 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2445 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2446 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2447 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2448 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2449
2450 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2451 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2452
2453 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2454
2455 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2456 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2457
2458 info->requested = (integer_t) bits;
2459 bits = 0;
2460
2461 struct thread_effective_policy effective = thread->effective_policy;
2462
2463 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2464
2465 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2466 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2467 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2468 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2469
2470 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2471
2472 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2473 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2474
2475 info->effective = (integer_t)bits;
2476 bits = 0;
2477
2478 info->pending = 0;
2479 }
2480
2481 /*
2482 * Sneakily trace either the task and thread requested
2483 * or just the thread requested, depending on if we have enough room.
2484 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2485 *
2486 * LP32 LP64
2487 * threquested_0(thread) thread[0] task[0]
2488 * threquested_1(thread) thread[1] thread[0]
2489 *
2490 */
2491
2492 uintptr_t
threquested_0(thread_t thread)2493 threquested_0(thread_t thread)
2494 {
2495 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2496
2497 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2498
2499 return raw[0];
2500 }
2501
2502 uintptr_t
threquested_1(thread_t thread)2503 threquested_1(thread_t thread)
2504 {
2505 #if defined __LP64__
2506 return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2507 #else
2508 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2509 return raw[1];
2510 #endif
2511 }
2512
2513 uintptr_t
theffective_0(thread_t thread)2514 theffective_0(thread_t thread)
2515 {
2516 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2517
2518 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2519 return raw[0];
2520 }
2521
2522 uintptr_t
theffective_1(thread_t thread)2523 theffective_1(thread_t thread)
2524 {
2525 #if defined __LP64__
2526 return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2527 #else
2528 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2529 return raw[1];
2530 #endif
2531 }
2532
2533
2534 /*
2535 * Set an override on the thread which is consulted with a
2536 * higher priority than the task/thread policy. This should
2537 * only be set for temporary grants until the thread
2538 * returns to the userspace boundary
2539 *
2540 * We use atomic operations to swap in the override, with
2541 * the assumption that the thread itself can
2542 * read the override and clear it on return to userspace.
2543 *
2544 * No locking is performed, since it is acceptable to see
2545 * a stale override for one loop through throttle_lowpri_io().
2546 * However a thread reference must be held on the thread.
2547 */
2548
2549 void
set_thread_iotier_override(thread_t thread,int policy)2550 set_thread_iotier_override(thread_t thread, int policy)
2551 {
2552 int current_override;
2553
2554 /* Let most aggressive I/O policy win until user boundary */
2555 do {
2556 current_override = thread->iotier_override;
2557
2558 if (current_override != THROTTLE_LEVEL_NONE) {
2559 policy = MIN(current_override, policy);
2560 }
2561
2562 if (current_override == policy) {
2563 /* no effective change */
2564 return;
2565 }
2566 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2567
2568 /*
2569 * Since the thread may be currently throttled,
2570 * re-evaluate tiers and potentially break out
2571 * of an msleep
2572 */
2573 rethrottle_thread(get_bsdthread_info(thread));
2574 }
2575
2576 /*
2577 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2578 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2579 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2580 * priority thread. In these cases, we attempt to propagate the priority token, as long
2581 * as the subsystem informs us of the relationships between the threads. The userspace
2582 * synchronization subsystem should maintain the information of owner->resource and
2583 * resource->waiters itself.
2584 */
2585
2586 /*
2587 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2588 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2589 * to be handled specially in the future, but for now it's fine to slam
2590 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2591 */
2592 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2593 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2594 {
2595 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2596 /* Map all input resource/type to a single one */
2597 *resource = USER_ADDR_NULL;
2598 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2599 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2600 /* no transform */
2601 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2602 /* Map all mutex overrides to a single one, to avoid memory overhead */
2603 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2604 *resource = USER_ADDR_NULL;
2605 }
2606 }
2607 }
2608
2609 /* This helper routine finds an existing override if known. Locking should be done by caller */
2610 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2611 find_qos_override(thread_t thread,
2612 user_addr_t resource,
2613 int resource_type)
2614 {
2615 struct thread_qos_override *override;
2616
2617 override = thread->overrides;
2618 while (override) {
2619 if (override->override_resource == resource &&
2620 override->override_resource_type == resource_type) {
2621 return override;
2622 }
2623
2624 override = override->override_next;
2625 }
2626
2627 return NULL;
2628 }
2629
2630 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2631 find_and_decrement_qos_override(thread_t thread,
2632 user_addr_t resource,
2633 int resource_type,
2634 boolean_t reset,
2635 struct thread_qos_override **free_override_list)
2636 {
2637 struct thread_qos_override *override, *override_prev;
2638
2639 override_prev = NULL;
2640 override = thread->overrides;
2641 while (override) {
2642 struct thread_qos_override *override_next = override->override_next;
2643
2644 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2645 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2646 if (reset) {
2647 override->override_contended_resource_count = 0;
2648 } else {
2649 override->override_contended_resource_count--;
2650 }
2651
2652 if (override->override_contended_resource_count == 0) {
2653 if (override_prev == NULL) {
2654 thread->overrides = override_next;
2655 } else {
2656 override_prev->override_next = override_next;
2657 }
2658
2659 /* Add to out-param for later zfree */
2660 override->override_next = *free_override_list;
2661 *free_override_list = override;
2662 } else {
2663 override_prev = override;
2664 }
2665
2666 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2667 return;
2668 }
2669 } else {
2670 override_prev = override;
2671 }
2672
2673 override = override_next;
2674 }
2675 }
2676
2677 /* This helper recalculates the current requested override using the policy selected at boot */
2678 static int
calculate_requested_qos_override(thread_t thread)2679 calculate_requested_qos_override(thread_t thread)
2680 {
2681 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2682 return THREAD_QOS_UNSPECIFIED;
2683 }
2684
2685 /* iterate over all overrides and calculate MAX */
2686 struct thread_qos_override *override;
2687 int qos_override = THREAD_QOS_UNSPECIFIED;
2688
2689 override = thread->overrides;
2690 while (override) {
2691 qos_override = MAX(qos_override, override->override_qos);
2692 override = override->override_next;
2693 }
2694
2695 return qos_override;
2696 }
2697
2698 /*
2699 * Returns:
2700 * - 0 on success
2701 * - EINVAL if some invalid input was passed
2702 */
2703 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2704 proc_thread_qos_add_override_internal(thread_t thread,
2705 int override_qos,
2706 boolean_t first_override_for_resource,
2707 user_addr_t resource,
2708 int resource_type)
2709 {
2710 struct task_pend_token pend_token = {};
2711 int rc = 0;
2712
2713 thread_mtx_lock(thread);
2714
2715 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2716 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2717
2718 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2719 uint64_t, thread->requested_policy.thrp_qos,
2720 uint64_t, thread->effective_policy.thep_qos,
2721 int, override_qos, boolean_t, first_override_for_resource);
2722
2723 struct thread_qos_override *override;
2724 struct thread_qos_override *override_new = NULL;
2725 int new_qos_override, prev_qos_override;
2726 int new_effective_qos;
2727
2728 canonicalize_resource_and_type(&resource, &resource_type);
2729
2730 override = find_qos_override(thread, resource, resource_type);
2731 if (first_override_for_resource && !override) {
2732 /* We need to allocate a new object. Drop the thread lock and
2733 * recheck afterwards in case someone else added the override
2734 */
2735 thread_mtx_unlock(thread);
2736 override_new = zalloc(thread_qos_override_zone);
2737 thread_mtx_lock(thread);
2738 override = find_qos_override(thread, resource, resource_type);
2739 }
2740 if (first_override_for_resource && override) {
2741 /* Someone else already allocated while the thread lock was dropped */
2742 override->override_contended_resource_count++;
2743 } else if (!override && override_new) {
2744 override = override_new;
2745 override_new = NULL;
2746 override->override_next = thread->overrides;
2747 /* since first_override_for_resource was TRUE */
2748 override->override_contended_resource_count = 1;
2749 override->override_resource = resource;
2750 override->override_resource_type = (int16_t)resource_type;
2751 override->override_qos = THREAD_QOS_UNSPECIFIED;
2752 thread->overrides = override;
2753 }
2754
2755 if (override) {
2756 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2757 override->override_qos = (int16_t)override_qos;
2758 } else {
2759 override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2760 }
2761 }
2762
2763 /* Determine how to combine the various overrides into a single current
2764 * requested override
2765 */
2766 new_qos_override = calculate_requested_qos_override(thread);
2767
2768 prev_qos_override = proc_get_thread_policy_locked(thread,
2769 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2770
2771 if (new_qos_override != prev_qos_override) {
2772 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2773 TASK_POLICY_QOS_OVERRIDE,
2774 new_qos_override, 0, &pend_token);
2775 }
2776
2777 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2778
2779 thread_mtx_unlock(thread);
2780
2781 thread_policy_update_complete_unlocked(thread, &pend_token);
2782
2783 if (override_new) {
2784 zfree(thread_qos_override_zone, override_new);
2785 }
2786
2787 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2788 int, new_qos_override, int, new_effective_qos, int, rc);
2789
2790 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2791 new_qos_override, resource, resource_type, 0, 0);
2792
2793 return rc;
2794 }
2795
2796 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2797 proc_thread_qos_add_override(task_t task,
2798 thread_t thread,
2799 uint64_t tid,
2800 int override_qos,
2801 boolean_t first_override_for_resource,
2802 user_addr_t resource,
2803 int resource_type)
2804 {
2805 boolean_t has_thread_reference = FALSE;
2806 int rc = 0;
2807
2808 if (thread == THREAD_NULL) {
2809 thread = task_findtid(task, tid);
2810 /* returns referenced thread */
2811
2812 if (thread == THREAD_NULL) {
2813 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2814 tid, 0, 0xdead, 0, 0);
2815 return ESRCH;
2816 }
2817 has_thread_reference = TRUE;
2818 } else {
2819 assert(get_threadtask(thread) == task);
2820 }
2821 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2822 first_override_for_resource, resource, resource_type);
2823 if (has_thread_reference) {
2824 thread_deallocate(thread);
2825 }
2826
2827 return rc;
2828 }
2829
2830 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2831 proc_thread_qos_remove_override_internal(thread_t thread,
2832 user_addr_t resource,
2833 int resource_type,
2834 boolean_t reset)
2835 {
2836 struct task_pend_token pend_token = {};
2837
2838 struct thread_qos_override *deferred_free_override_list = NULL;
2839 int new_qos_override, prev_qos_override, new_effective_qos;
2840
2841 thread_mtx_lock(thread);
2842
2843 canonicalize_resource_and_type(&resource, &resource_type);
2844
2845 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2846
2847 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2848 thread_tid(thread), resource, reset, 0, 0);
2849
2850 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2851 uint64_t, thread->requested_policy.thrp_qos,
2852 uint64_t, thread->effective_policy.thep_qos);
2853
2854 /* Determine how to combine the various overrides into a single current requested override */
2855 new_qos_override = calculate_requested_qos_override(thread);
2856
2857 spl_t s = splsched();
2858 thread_lock(thread);
2859
2860 /*
2861 * The override chain and therefore the value of the current override is locked with thread mutex,
2862 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2863 * This means you can't change the current override from a spinlock-only setter.
2864 */
2865 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2866
2867 if (new_qos_override != prev_qos_override) {
2868 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2869 }
2870
2871 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2872
2873 thread_unlock(thread);
2874 splx(s);
2875
2876 thread_mtx_unlock(thread);
2877
2878 thread_policy_update_complete_unlocked(thread, &pend_token);
2879
2880 while (deferred_free_override_list) {
2881 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2882
2883 zfree(thread_qos_override_zone, deferred_free_override_list);
2884 deferred_free_override_list = override_next;
2885 }
2886
2887 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2888 int, new_qos_override, int, new_effective_qos);
2889
2890 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2891 thread_tid(thread), 0, 0, 0, 0);
2892 }
2893
2894 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2895 proc_thread_qos_remove_override(task_t task,
2896 thread_t thread,
2897 uint64_t tid,
2898 user_addr_t resource,
2899 int resource_type)
2900 {
2901 boolean_t has_thread_reference = FALSE;
2902
2903 if (thread == THREAD_NULL) {
2904 thread = task_findtid(task, tid);
2905 /* returns referenced thread */
2906
2907 if (thread == THREAD_NULL) {
2908 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2909 tid, 0, 0xdead, 0, 0);
2910 return ESRCH;
2911 }
2912 has_thread_reference = TRUE;
2913 } else {
2914 assert(task == get_threadtask(thread));
2915 }
2916
2917 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2918
2919 if (has_thread_reference) {
2920 thread_deallocate(thread);
2921 }
2922
2923 return 0;
2924 }
2925
2926 /* Deallocate before thread termination */
2927 void
proc_thread_qos_deallocate(thread_t thread)2928 proc_thread_qos_deallocate(thread_t thread)
2929 {
2930 /* This thread must have no more IPC overrides. */
2931 assert(thread->kevent_overrides == 0);
2932 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2933 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2934
2935 /*
2936 * Clear out any lingering override objects.
2937 */
2938 struct thread_qos_override *override;
2939
2940 thread_mtx_lock(thread);
2941 override = thread->overrides;
2942 thread->overrides = NULL;
2943 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2944 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2945 thread_mtx_unlock(thread);
2946
2947 while (override) {
2948 struct thread_qos_override *override_next = override->override_next;
2949
2950 zfree(thread_qos_override_zone, override);
2951 override = override_next;
2952 }
2953 }
2954
2955 /*
2956 * Set up the primordial thread's QoS
2957 */
2958 void
task_set_main_thread_qos(task_t task,thread_t thread)2959 task_set_main_thread_qos(task_t task, thread_t thread)
2960 {
2961 struct task_pend_token pend_token = {};
2962
2963 assert(get_threadtask(thread) == task);
2964
2965 thread_mtx_lock(thread);
2966
2967 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2968 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2969 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2970 thread->requested_policy.thrp_qos, 0);
2971
2972 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2973
2974 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2975 primordial_qos, 0, &pend_token);
2976
2977 thread_mtx_unlock(thread);
2978
2979 thread_policy_update_complete_unlocked(thread, &pend_token);
2980
2981 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2982 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2983 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2984 primordial_qos, 0);
2985 }
2986
2987 /*
2988 * KPI for pthread kext
2989 *
2990 * Return a good guess at what the initial manager QoS will be
2991 * Dispatch can override this in userspace if it so chooses
2992 */
2993 thread_qos_t
task_get_default_manager_qos(task_t task)2994 task_get_default_manager_qos(task_t task)
2995 {
2996 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2997
2998 if (primordial_qos == THREAD_QOS_LEGACY) {
2999 primordial_qos = THREAD_QOS_USER_INITIATED;
3000 }
3001
3002 return primordial_qos;
3003 }
3004
3005 /*
3006 * Check if the kernel promotion on thread has changed
3007 * and apply it.
3008 *
3009 * thread locked on entry and exit
3010 */
3011 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)3012 thread_recompute_kernel_promotion_locked(thread_t thread)
3013 {
3014 boolean_t needs_update = FALSE;
3015 uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
3016
3017 /*
3018 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
3019 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
3020 * and propagates the priority through the chain with the same cap, because as of now it does
3021 * not differenciate on the kernel primitive.
3022 *
3023 * If this assumption will change with the adoption of a kernel primitive that does not
3024 * cap the when adding/propagating,
3025 * then here is the place to put the generic cap for all kernel primitives
3026 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
3027 */
3028 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
3029
3030 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
3031 KDBG(MACHDBG_CODE(
3032 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3033 thread_tid(thread),
3034 kern_promotion_schedpri,
3035 thread->kern_promotion_schedpri);
3036
3037 needs_update = TRUE;
3038 thread->kern_promotion_schedpri = kern_promotion_schedpri;
3039 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3040 }
3041
3042 return needs_update;
3043 }
3044
3045 /*
3046 * Check if the user promotion on thread has changed
3047 * and apply it.
3048 *
3049 * thread locked on entry, might drop the thread lock
3050 * and reacquire it.
3051 */
3052 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3053 thread_recompute_user_promotion_locked(thread_t thread)
3054 {
3055 boolean_t needs_update = FALSE;
3056 struct task_pend_token pend_token = {};
3057 uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3058 int old_base_pri = thread->base_pri;
3059 thread_qos_t qos_promotion;
3060
3061 /* Check if user promotion has changed */
3062 if (thread->user_promotion_basepri == user_promotion_basepri) {
3063 return needs_update;
3064 } else {
3065 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3066 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3067 thread_tid(thread),
3068 user_promotion_basepri,
3069 thread->user_promotion_basepri,
3070 0, 0);
3071 KDBG(MACHDBG_CODE(
3072 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3073 thread_tid(thread),
3074 user_promotion_basepri,
3075 thread->user_promotion_basepri);
3076 }
3077
3078 /* Update the user promotion base pri */
3079 thread->user_promotion_basepri = user_promotion_basepri;
3080 pend_token.tpt_force_recompute_pri = 1;
3081
3082 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3083 qos_promotion = THREAD_QOS_UNSPECIFIED;
3084 } else {
3085 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3086 }
3087
3088 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3089 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3090
3091 if (thread_get_waiting_turnstile(thread) &&
3092 thread->base_pri != old_base_pri) {
3093 needs_update = TRUE;
3094 }
3095
3096 thread_unlock(thread);
3097
3098 thread_policy_update_complete_unlocked(thread, &pend_token);
3099
3100 thread_lock(thread);
3101
3102 return needs_update;
3103 }
3104
3105 /*
3106 * Convert the thread user promotion base pri to qos for threads in qos world.
3107 * For priority above UI qos, the qos would be set to UI.
3108 */
3109 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3110 thread_user_promotion_qos_for_pri(int priority)
3111 {
3112 thread_qos_t qos;
3113 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3114 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3115 return qos;
3116 }
3117 }
3118 return THREAD_QOS_MAINTENANCE;
3119 }
3120
3121 /*
3122 * Set the thread's QoS Kevent override
3123 * Owned by the Kevent subsystem
3124 *
3125 * May be called with spinlocks held, but not spinlocks
3126 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3127 *
3128 * One 'add' must be balanced by one 'drop'.
3129 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3130 * Before the thread is deallocated, there must be 0 remaining overrides.
3131 */
3132 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3133 thread_kevent_override(thread_t thread,
3134 uint32_t qos_override,
3135 boolean_t is_new_override)
3136 {
3137 struct task_pend_token pend_token = {};
3138 boolean_t needs_update;
3139
3140 spl_t s = splsched();
3141 thread_lock(thread);
3142
3143 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3144
3145 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3146 assert(qos_override < THREAD_QOS_LAST);
3147
3148 if (is_new_override) {
3149 if (thread->kevent_overrides++ == 0) {
3150 /* This add is the first override for this thread */
3151 assert(old_override == THREAD_QOS_UNSPECIFIED);
3152 } else {
3153 /* There are already other overrides in effect for this thread */
3154 assert(old_override > THREAD_QOS_UNSPECIFIED);
3155 }
3156 } else {
3157 /* There must be at least one override (the previous add call) in effect */
3158 assert(thread->kevent_overrides > 0);
3159 assert(old_override > THREAD_QOS_UNSPECIFIED);
3160 }
3161
3162 /*
3163 * We can't allow lowering if there are several IPC overrides because
3164 * the caller can't possibly know the whole truth
3165 */
3166 if (thread->kevent_overrides == 1) {
3167 needs_update = qos_override != old_override;
3168 } else {
3169 needs_update = qos_override > old_override;
3170 }
3171
3172 if (needs_update) {
3173 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3174 TASK_POLICY_QOS_KEVENT_OVERRIDE,
3175 qos_override, 0, &pend_token);
3176 assert(pend_token.tpt_update_sockets == 0);
3177 }
3178
3179 thread_unlock(thread);
3180 splx(s);
3181
3182 thread_policy_update_complete_unlocked(thread, &pend_token);
3183 }
3184
3185 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3186 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3187 {
3188 thread_kevent_override(thread, qos_override, TRUE);
3189 }
3190
3191 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3192 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3193 {
3194 thread_kevent_override(thread, qos_override, FALSE);
3195 }
3196
3197 void
thread_drop_kevent_override(thread_t thread)3198 thread_drop_kevent_override(thread_t thread)
3199 {
3200 struct task_pend_token pend_token = {};
3201
3202 spl_t s = splsched();
3203 thread_lock(thread);
3204
3205 assert(thread->kevent_overrides > 0);
3206
3207 if (--thread->kevent_overrides == 0) {
3208 /*
3209 * There are no more overrides for this thread, so we should
3210 * clear out the saturated override value
3211 */
3212
3213 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3214 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3215 0, &pend_token);
3216 }
3217
3218 thread_unlock(thread);
3219 splx(s);
3220
3221 thread_policy_update_complete_unlocked(thread, &pend_token);
3222 }
3223
3224 /*
3225 * Set the thread's QoS Workloop Servicer override
3226 * Owned by the Kevent subsystem
3227 *
3228 * May be called with spinlocks held, but not spinlocks
3229 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3230 *
3231 * One 'add' must be balanced by one 'drop'.
3232 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3233 * Before the thread is deallocated, there must be 0 remaining overrides.
3234 */
3235 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3236 thread_servicer_override(thread_t thread,
3237 uint32_t qos_override,
3238 boolean_t is_new_override)
3239 {
3240 struct task_pend_token pend_token = {};
3241
3242 spl_t s = splsched();
3243 thread_lock(thread);
3244
3245 if (is_new_override) {
3246 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3247 } else {
3248 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3249 }
3250
3251 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3252 TASK_POLICY_QOS_SERVICER_OVERRIDE,
3253 qos_override, 0, &pend_token);
3254
3255 thread_unlock(thread);
3256 splx(s);
3257
3258 assert(pend_token.tpt_update_sockets == 0);
3259 thread_policy_update_complete_unlocked(thread, &pend_token);
3260 }
3261
3262 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3263 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3264 {
3265 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3266 assert(qos_override < THREAD_QOS_LAST);
3267
3268 thread_servicer_override(thread, qos_override, TRUE);
3269 }
3270
3271 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3272 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3273 {
3274 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3275 assert(qos_override < THREAD_QOS_LAST);
3276
3277 thread_servicer_override(thread, qos_override, FALSE);
3278 }
3279
3280 void
thread_drop_servicer_override(thread_t thread)3281 thread_drop_servicer_override(thread_t thread)
3282 {
3283 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3284 }
3285
3286 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3287 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3288 {
3289 struct task_pend_token pend_token = {};
3290 uint8_t current_iotier;
3291
3292 /* Check if the update is needed */
3293 current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3294 TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3295
3296 if (iotier_override == current_iotier) {
3297 return;
3298 }
3299
3300 spl_t s = splsched();
3301 thread_lock(thread);
3302
3303 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3304 TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3305 iotier_override, 0, &pend_token);
3306
3307 thread_unlock(thread);
3308 splx(s);
3309
3310 assert(pend_token.tpt_update_sockets == 0);
3311 thread_policy_update_complete_unlocked(thread, &pend_token);
3312 }
3313
3314 /* Get current requested qos / relpri, may be called from spinlock context */
3315 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3316 thread_get_requested_qos(thread_t thread, int *relpri)
3317 {
3318 int relprio_value = 0;
3319 thread_qos_t qos;
3320
3321 qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3322 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3323 if (relpri) {
3324 *relpri = -relprio_value;
3325 }
3326 return qos;
3327 }
3328
3329 /*
3330 * This function will promote the thread priority
3331 * since exec could block other threads calling
3332 * proc_find on the proc. This boost must be removed
3333 * via call to thread_clear_exec_promotion.
3334 *
3335 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3336 */
3337 void
thread_set_exec_promotion(thread_t thread)3338 thread_set_exec_promotion(thread_t thread)
3339 {
3340 spl_t s = splsched();
3341 thread_lock(thread);
3342
3343 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3344
3345 thread_unlock(thread);
3346 splx(s);
3347 }
3348
3349 /*
3350 * This function will clear the exec thread
3351 * promotion set on the thread by thread_set_exec_promotion.
3352 */
3353 void
thread_clear_exec_promotion(thread_t thread)3354 thread_clear_exec_promotion(thread_t thread)
3355 {
3356 spl_t s = splsched();
3357 thread_lock(thread);
3358
3359 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3360
3361 thread_unlock(thread);
3362 splx(s);
3363 }
3364
3365 #if CONFIG_SCHED_RT_ALLOW
3366
3367 /*
3368 * flag set by -rt-allow-policy-enable boot-arg to restrict use of
3369 * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3370 * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3371 */
3372 static TUNABLE(
3373 bool,
3374 rt_allow_policy_enabled,
3375 "-rt-allow_policy-enable",
3376 false
3377 );
3378
3379 /*
3380 * When the RT allow policy is enabled and a thread allowed to become RT,
3381 * sometimes (if the processes RT allow policy is restricted) the thread will
3382 * have a CPU limit enforced. The following two tunables determine the
3383 * parameters for that CPU limit.
3384 */
3385
3386 /* % of the interval allowed to run. */
3387 TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent,
3388 "rt_allow_limit_percent", 70);
3389
3390 /* The length of interval in nanoseconds. */
3391 TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms,
3392 "rt_allow_limit_interval", 10);
3393
3394 static bool
thread_has_rt(thread_t thread)3395 thread_has_rt(thread_t thread)
3396 {
3397 return
3398 thread->sched_mode == TH_MODE_REALTIME ||
3399 thread->saved_mode == TH_MODE_REALTIME;
3400 }
3401
3402 /*
3403 * Set a CPU limit on a thread based on the RT allow policy. This will be picked
3404 * up by the target thread via the ledger AST.
3405 */
3406 static void
thread_rt_set_cpulimit(thread_t thread)3407 thread_rt_set_cpulimit(thread_t thread)
3408 {
3409 /* Force reasonable values for the cpu limit. */
3410 const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1);
3411 const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1);
3412
3413 thread->t_ledger_req_percentage = percent;
3414 thread->t_ledger_req_interval_ms = interval_ms;
3415 thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK;
3416
3417 thread->sched_flags |= TH_SFLAG_RT_CPULIMIT;
3418 }
3419
3420 /* Similar to the above but removes any CPU limit. */
3421 static void
thread_rt_clear_cpulimit(thread_t thread)3422 thread_rt_clear_cpulimit(thread_t thread)
3423 {
3424 thread->sched_flags &= ~TH_SFLAG_RT_CPULIMIT;
3425
3426 thread->t_ledger_req_percentage = 0;
3427 thread->t_ledger_req_interval_ms = 0;
3428 thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE;
3429 }
3430
3431 /*
3432 * Evaluate RT policy for a thread, demoting and undemoting as needed.
3433 */
3434 void
thread_rt_evaluate(thread_t thread)3435 thread_rt_evaluate(thread_t thread)
3436 {
3437 task_t task = get_threadtask(thread);
3438 bool platform_binary = false;
3439
3440 /* If the RT allow policy is not enabled - nothing to do. */
3441 if (!rt_allow_policy_enabled) {
3442 return;
3443 }
3444
3445 /* User threads only. */
3446 if (task == kernel_task) {
3447 return;
3448 }
3449
3450 /* Check for platform binary. */
3451 platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
3452
3453 spl_t s = splsched();
3454 thread_lock(thread);
3455
3456 const thread_work_interval_flags_t wi_flags =
3457 os_atomic_load(&thread->th_work_interval_flags, relaxed);
3458
3459 /*
3460 * RT threads which are not joined to a work interval which allows RT
3461 * threads are demoted. Once those conditions no longer hold, the thread
3462 * undemoted.
3463 */
3464 if (thread_has_rt(thread) && (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) {
3465 if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3466 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL),
3467 thread_tid(thread));
3468 sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED);
3469 }
3470 } else {
3471 if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3472 sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
3473 }
3474 }
3475
3476 /*
3477 * RT threads get a CPU limit unless they're part of a platform binary
3478 * task. If the thread is no longer RT, any existing CPU limit should be
3479 * removed.
3480 */
3481 bool set_ast = false;
3482 if (!platform_binary &&
3483 thread_has_rt(thread) &&
3484 (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) == 0) {
3485 thread_rt_set_cpulimit(thread);
3486 set_ast = true;
3487 }
3488
3489 if (!platform_binary &&
3490 !thread_has_rt(thread) &&
3491 (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) != 0) {
3492 thread_rt_clear_cpulimit(thread);
3493 set_ast = true;
3494 }
3495
3496 thread_unlock(thread);
3497 splx(s);
3498
3499 if (set_ast) {
3500 /* Ensure the target thread picks up any CPU limit change. */
3501 act_set_astledger(thread);
3502 }
3503 }
3504
3505 #else
3506
3507 void
thread_rt_evaluate(__unused thread_t thread)3508 thread_rt_evaluate(__unused thread_t thread)
3509 {
3510 }
3511
3512 #endif /* CONFIG_SCHED_RT_ALLOW */
3513