1 /*
2 * Copyright (c) 2000-2020 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <kern/policy_internal.h>
30 #include <mach/task_policy.h>
31 #include <mach/task.h>
32 #include <mach/mach_types.h>
33 #include <mach/task_server.h>
34 #include <kern/host.h> /* host_priv_self() */
35 #include <mach/host_priv.h> /* host_get_special_port() */
36 #include <mach/host_special_ports.h> /* RESOURCE_NOTIFY_PORT */
37 #include <kern/sched.h>
38 #include <kern/task.h>
39 #include <mach/thread_policy.h>
40 #include <sys/errno.h>
41 #include <sys/resource.h>
42 #include <machine/limits.h>
43 #include <kern/ledger.h>
44 #include <kern/thread_call.h>
45 #include <kern/sfi.h>
46 #include <kern/coalition.h>
47 #if CONFIG_TELEMETRY
48 #include <kern/telemetry.h>
49 #endif
50 #if !defined(XNU_TARGET_OS_OSX)
51 #include <kern/kalloc.h>
52 #include <sys/errno.h>
53 #endif /* !defined(XNU_TARGET_OS_OSX) */
54
55 #if IMPORTANCE_INHERITANCE
56 #include <ipc/ipc_importance.h>
57 #if IMPORTANCE_TRACE
58 #include <mach/machine/sdt.h>
59 #endif /* IMPORTANCE_TRACE */
60 #endif /* IMPORTANCE_INHERITACE */
61
62 #include <sys/kdebug.h>
63
64 /*
65 * Task Policy
66 *
67 * This subsystem manages task and thread IO priority and backgrounding,
68 * as well as importance inheritance, process suppression, task QoS, and apptype.
69 * These properties have a suprising number of complex interactions, so they are
70 * centralized here in one state machine to simplify the implementation of those interactions.
71 *
72 * Architecture:
73 * Threads and tasks have two policy fields: requested, effective.
74 * Requested represents the wishes of each interface that influences task policy.
75 * Effective represents the distillation of that policy into a set of behaviors.
76 *
77 * Each thread making a modification in the policy system passes a 'pending' struct,
78 * which tracks updates that will be applied after dropping the policy engine lock.
79 *
80 * Each interface that has an input into the task policy state machine controls a field in requested.
81 * If the interface has a getter, it returns what is in the field in requested, but that is
82 * not necessarily what is actually in effect.
83 *
84 * All kernel subsystems that behave differently based on task policy call into
85 * the proc_get_effective_(task|thread)_policy functions, which return the decision of the task policy state machine
86 * for that subsystem by querying only the 'effective' field.
87 *
88 * Policy change operations:
89 * Here are the steps to change a policy on a task or thread:
90 * 1) Lock task
91 * 2) Change requested field for the relevant policy
92 * 3) Run a task policy update, which recalculates effective based on requested,
93 * then takes a diff between the old and new versions of requested and calls the relevant
94 * other subsystems to apply these changes, and updates the pending field.
95 * 4) Unlock task
96 * 5) Run task policy update complete, which looks at the pending field to update
97 * subsystems which cannot be touched while holding the task lock.
98 *
99 * To add a new requested policy, add the field in the requested struct, the flavor in task.h,
100 * the setter and getter in proc_(set|get)_task_policy*,
101 * then set up the effects of that behavior in task_policy_update*. If the policy manifests
102 * itself as a distinct effective policy, add it to the effective struct and add it to the
103 * proc_get_effective_task_policy accessor.
104 *
105 * Most policies are set via proc_set_task_policy, but policies that don't fit that interface
106 * roll their own lock/set/update/unlock/complete code inside this file.
107 *
108 *
109 * Suppression policy
110 *
111 * These are a set of behaviors that can be requested for a task. They currently have specific
112 * implied actions when they're enabled, but they may be made customizable in the future.
113 *
114 * When the affected task is boosted, we temporarily disable the suppression behaviors
115 * so that the affected process has a chance to run so it can call the API to permanently
116 * disable the suppression behaviors.
117 *
118 * Locking
119 *
120 * Changing task policy on a task takes the task lock.
121 * Changing task policy on a thread takes the thread mutex.
122 * Task policy changes that affect threads will take each thread's mutex to update it if necessary.
123 *
124 * Querying the effective policy does not take a lock, because callers
125 * may run in interrupt context or other place where locks are not OK.
126 *
127 * This means that any notification of state change needs to be externally synchronized.
128 * We do this by idempotent callouts after the state has changed to ask
129 * other subsystems to update their view of the world.
130 *
131 * TODO: Move all cpu/wakes/io monitor code into a separate file
132 * TODO: Move all importance code over to importance subsystem
133 * TODO: Move all taskwatch code into a separate file
134 * TODO: Move all VM importance code into a separate file
135 */
136
137 /* Task policy related helper functions */
138 static void proc_set_task_policy_locked(task_t task, int category, int flavor, int value, int value2);
139
140 static void task_policy_update_locked(task_t task, task_pend_token_t pend_token);
141 static void task_policy_update_internal_locked(task_t task, bool in_create, task_pend_token_t pend_token);
142
143 /* For attributes that have two scalars as input/output */
144 static void proc_set_task_policy2(task_t task, int category, int flavor, int value1, int value2);
145 static void proc_get_task_policy2(task_t task, int category, int flavor, int *value1, int *value2);
146
147 static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role, task_pend_token_t pend_token);
148
149 static uint64_t task_requested_bitfield(task_t task);
150 static uint64_t task_effective_bitfield(task_t task);
151
152 /* Convenience functions for munging a policy bitfield into a tracepoint */
153 static uintptr_t trequested_0(task_t task);
154 static uintptr_t trequested_1(task_t task);
155 static uintptr_t teffective_0(task_t task);
156 static uintptr_t teffective_1(task_t task);
157
158 /* CPU limits helper functions */
159 static int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled);
160 static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope);
161 static int task_enable_cpumon_locked(task_t task);
162 static int task_disable_cpumon(task_t task);
163 static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled);
164 static int task_apply_resource_actions(task_t task, int type);
165 static void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1);
166
167 #ifdef MACH_BSD
168 typedef struct proc * proc_t;
169 int proc_pid(struct proc *proc);
170 extern int proc_selfpid(void);
171 extern char * proc_name_address(void *p);
172 extern char * proc_best_name(proc_t proc);
173
174 extern int proc_pidpathinfo_internal(proc_t p, uint64_t arg,
175 char *buffer, uint32_t buffersize,
176 int32_t *retval);
177 #endif /* MACH_BSD */
178
179
180 #if CONFIG_TASKWATCH
181 /* Taskwatch related helper functions */
182 static void set_thread_appbg(thread_t thread, int setbg, int importance);
183 static void add_taskwatch_locked(task_t task, task_watch_t * twp);
184 static void remove_taskwatch_locked(task_t task, task_watch_t * twp);
185 static void task_watch_lock(void);
186 static void task_watch_unlock(void);
187 static void apply_appstate_watchers(task_t task);
188
189 typedef struct task_watcher {
190 queue_chain_t tw_links; /* queueing of threads */
191 task_t tw_task; /* task that is being watched */
192 thread_t tw_thread; /* thread that is watching the watch_task */
193 int tw_state; /* the current app state of the thread */
194 int tw_importance; /* importance prior to backgrounding */
195 } task_watch_t;
196
197 typedef struct thread_watchlist {
198 thread_t thread; /* thread being worked on for taskwatch action */
199 int importance; /* importance to be restored if thread is being made active */
200 } thread_watchlist_t;
201
202 #endif /* CONFIG_TASKWATCH */
203
204 extern int memorystatus_update_priority_for_appnap(proc_t p, boolean_t is_appnap);
205
206 /* Importance Inheritance related helper functions */
207
208 #if IMPORTANCE_INHERITANCE
209
210 static void task_importance_mark_live_donor(task_t task, boolean_t donating);
211 static void task_importance_mark_receiver(task_t task, boolean_t receiving);
212 static void task_importance_mark_denap_receiver(task_t task, boolean_t denap);
213
214 static boolean_t task_is_marked_live_importance_donor(task_t task);
215 static boolean_t task_is_importance_receiver(task_t task);
216 static boolean_t task_is_importance_denap_receiver(task_t task);
217
218 static int task_importance_hold_internal_assertion(task_t target_task, uint32_t count);
219
220 static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp);
221 static void task_importance_update_live_donor(task_t target_task);
222
223 static void task_set_boost_locked(task_t task, boolean_t boost_active);
224
225 #endif /* IMPORTANCE_INHERITANCE */
226
227 #if IMPORTANCE_TRACE
228 #define __imptrace_only
229 #else /* IMPORTANCE_TRACE */
230 #define __imptrace_only __unused
231 #endif /* !IMPORTANCE_TRACE */
232
233 #if IMPORTANCE_INHERITANCE
234 #define __imp_only
235 #else
236 #define __imp_only __unused
237 #endif
238
239 /*
240 * Default parameters for certain policies
241 */
242
243 int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1;
244 int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1;
245
246 int proc_graphics_timer_qos = (LATENCY_QOS_TIER_0 & 0xFF);
247
248 const int proc_default_bg_iotier = THROTTLE_LEVEL_TIER2;
249
250 /* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */
251 const struct task_requested_policy default_task_requested_policy = {
252 .trp_bg_iotier = proc_default_bg_iotier
253 };
254 const struct task_effective_policy default_task_effective_policy = {};
255
256 /*
257 * Default parameters for CPU usage monitor.
258 *
259 * Default setting is 50% over 3 minutes.
260 */
261 #define DEFAULT_CPUMON_PERCENTAGE 50
262 #define DEFAULT_CPUMON_INTERVAL (3 * 60)
263
264 uint8_t proc_max_cpumon_percentage;
265 uint64_t proc_max_cpumon_interval;
266
267 kern_return_t
qos_latency_policy_validate(task_latency_qos_t ltier)268 qos_latency_policy_validate(task_latency_qos_t ltier)
269 {
270 if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) &&
271 ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0))) {
272 return KERN_INVALID_ARGUMENT;
273 }
274
275 return KERN_SUCCESS;
276 }
277
278 kern_return_t
qos_throughput_policy_validate(task_throughput_qos_t ttier)279 qos_throughput_policy_validate(task_throughput_qos_t ttier)
280 {
281 if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) &&
282 ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0))) {
283 return KERN_INVALID_ARGUMENT;
284 }
285
286 return KERN_SUCCESS;
287 }
288
289 static kern_return_t
task_qos_policy_validate(task_qos_policy_t qosinfo,mach_msg_type_number_t count)290 task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count)
291 {
292 if (count < TASK_QOS_POLICY_COUNT) {
293 return KERN_INVALID_ARGUMENT;
294 }
295
296 task_latency_qos_t ltier = qosinfo->task_latency_qos_tier;
297 task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier;
298
299 kern_return_t kr = qos_latency_policy_validate(ltier);
300
301 if (kr != KERN_SUCCESS) {
302 return kr;
303 }
304
305 kr = qos_throughput_policy_validate(ttier);
306
307 return kr;
308 }
309
310 uint32_t
qos_extract(uint32_t qv)311 qos_extract(uint32_t qv)
312 {
313 return qv & 0xFF;
314 }
315
316 uint32_t
qos_latency_policy_package(uint32_t qv)317 qos_latency_policy_package(uint32_t qv)
318 {
319 return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv);
320 }
321
322 uint32_t
qos_throughput_policy_package(uint32_t qv)323 qos_throughput_policy_package(uint32_t qv)
324 {
325 return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv);
326 }
327
328 #define TASK_POLICY_SUPPRESSION_DISABLE 0x1
329 #define TASK_POLICY_SUPPRESSION_IOTIER2 0x2
330 #define TASK_POLICY_SUPPRESSION_NONDONOR 0x4
331 /* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */
332 static boolean_t task_policy_suppression_flags = TASK_POLICY_SUPPRESSION_IOTIER2 |
333 TASK_POLICY_SUPPRESSION_NONDONOR;
334
335 static void
task_set_requested_apptype(task_t task,uint64_t apptype,__unused boolean_t update_tg_flag)336 task_set_requested_apptype(task_t task, uint64_t apptype, __unused boolean_t update_tg_flag)
337 {
338 task->requested_policy.trp_apptype = apptype;
339 #if CONFIG_THREAD_GROUPS
340 if (update_tg_flag && task_is_app(task)) {
341 task_coalition_thread_group_application_set(task);
342 }
343 #endif /* CONFIG_THREAD_GROUPS */
344 }
345
346 kern_return_t
task_policy_set(task_t task,task_policy_flavor_t flavor,task_policy_t policy_info,mach_msg_type_number_t count)347 task_policy_set(
348 task_t task,
349 task_policy_flavor_t flavor,
350 task_policy_t policy_info,
351 mach_msg_type_number_t count)
352 {
353 kern_return_t result = KERN_SUCCESS;
354
355 if (task == TASK_NULL || task == kernel_task) {
356 return KERN_INVALID_ARGUMENT;
357 }
358
359 switch (flavor) {
360 case TASK_CATEGORY_POLICY: {
361 task_category_policy_t info = (task_category_policy_t)policy_info;
362
363 if (count < TASK_CATEGORY_POLICY_COUNT) {
364 return KERN_INVALID_ARGUMENT;
365 }
366
367 #if !defined(XNU_TARGET_OS_OSX)
368 /* On embedded, you can't modify your own role. */
369 if (current_task() == task) {
370 return KERN_INVALID_ARGUMENT;
371 }
372 #endif
373
374 switch (info->role) {
375 case TASK_FOREGROUND_APPLICATION:
376 case TASK_BACKGROUND_APPLICATION:
377 case TASK_DEFAULT_APPLICATION:
378 proc_set_task_policy(task,
379 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
380 info->role);
381 break;
382
383 case TASK_CONTROL_APPLICATION:
384 if (task != current_task() || !task_is_privileged(task)) {
385 result = KERN_INVALID_ARGUMENT;
386 } else {
387 proc_set_task_policy(task,
388 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
389 info->role);
390 }
391 break;
392
393 case TASK_GRAPHICS_SERVER:
394 /* TODO: Restrict this role to FCFS <rdar://problem/12552788> */
395 if (task != current_task() || !task_is_privileged(task)) {
396 result = KERN_INVALID_ARGUMENT;
397 } else {
398 proc_set_task_policy(task,
399 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
400 info->role);
401 }
402 break;
403 default:
404 result = KERN_INVALID_ARGUMENT;
405 break;
406 } /* switch (info->role) */
407
408 break;
409 }
410
411 /* Desired energy-efficiency/performance "quality-of-service" */
412 case TASK_BASE_QOS_POLICY:
413 case TASK_OVERRIDE_QOS_POLICY:
414 {
415 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
416 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
417
418 if (kr != KERN_SUCCESS) {
419 return kr;
420 }
421
422
423 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
424 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
425
426 proc_set_task_policy2(task, TASK_POLICY_ATTRIBUTE,
427 flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS,
428 lqos, tqos);
429 }
430 break;
431
432 case TASK_BASE_LATENCY_QOS_POLICY:
433 {
434 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
435 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
436
437 if (kr != KERN_SUCCESS) {
438 return kr;
439 }
440
441 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
442
443 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos);
444 }
445 break;
446
447 case TASK_BASE_THROUGHPUT_QOS_POLICY:
448 {
449 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
450 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
451
452 if (kr != KERN_SUCCESS) {
453 return kr;
454 }
455
456 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
457
458 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos);
459 }
460 break;
461
462 case TASK_SUPPRESSION_POLICY:
463 {
464 #if !defined(XNU_TARGET_OS_OSX)
465 /*
466 * Suppression policy is not enabled for embedded
467 * because apps aren't marked as denap receivers
468 */
469 result = KERN_INVALID_ARGUMENT;
470 break;
471 #else /* !defined(XNU_TARGET_OS_OSX) */
472
473 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
474
475 if (count < TASK_SUPPRESSION_POLICY_COUNT) {
476 return KERN_INVALID_ARGUMENT;
477 }
478
479 struct task_qos_policy qosinfo;
480
481 qosinfo.task_latency_qos_tier = info->timer_throttle;
482 qosinfo.task_throughput_qos_tier = info->throughput_qos;
483
484 kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT);
485
486 if (kr != KERN_SUCCESS) {
487 return kr;
488 }
489
490 /* TEMPORARY disablement of task suppression */
491 if (info->active &&
492 (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_DISABLE)) {
493 return KERN_SUCCESS;
494 }
495
496 struct task_pend_token pend_token = {};
497
498 task_lock(task);
499
500 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
501 (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START,
502 proc_selfpid(), task_pid(task), trequested_0(task),
503 trequested_1(task), 0);
504
505 task->requested_policy.trp_sup_active = (info->active) ? 1 : 0;
506 task->requested_policy.trp_sup_lowpri_cpu = (info->lowpri_cpu) ? 1 : 0;
507 task->requested_policy.trp_sup_timer = qos_extract(info->timer_throttle);
508 task->requested_policy.trp_sup_disk = (info->disk_throttle) ? 1 : 0;
509 task->requested_policy.trp_sup_throughput = qos_extract(info->throughput_qos);
510 task->requested_policy.trp_sup_cpu = (info->suppressed_cpu) ? 1 : 0;
511 task->requested_policy.trp_sup_bg_sockets = (info->background_sockets) ? 1 : 0;
512
513 task_policy_update_locked(task, &pend_token);
514
515 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
516 (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END,
517 proc_selfpid(), task_pid(task), trequested_0(task),
518 trequested_1(task), 0);
519
520 task_unlock(task);
521
522 task_policy_update_complete_unlocked(task, &pend_token);
523
524 break;
525
526 #endif /* !defined(XNU_TARGET_OS_OSX) */
527 }
528
529 default:
530 result = KERN_INVALID_ARGUMENT;
531 break;
532 }
533
534 return result;
535 }
536
537 /* Sets BSD 'nice' value on the task */
538 kern_return_t
task_importance(task_t task,integer_t importance)539 task_importance(
540 task_t task,
541 integer_t importance)
542 {
543 if (task == TASK_NULL || task == kernel_task) {
544 return KERN_INVALID_ARGUMENT;
545 }
546
547 task_lock(task);
548
549 if (!task->active) {
550 task_unlock(task);
551
552 return KERN_TERMINATED;
553 }
554
555 if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) {
556 task_unlock(task);
557
558 return KERN_INVALID_ARGUMENT;
559 }
560
561 task->importance = importance;
562
563 struct task_pend_token pend_token = {};
564
565 task_policy_update_locked(task, &pend_token);
566
567 task_unlock(task);
568
569 task_policy_update_complete_unlocked(task, &pend_token);
570
571 return KERN_SUCCESS;
572 }
573
574 kern_return_t
task_policy_get(task_t task,task_policy_flavor_t flavor,task_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)575 task_policy_get(
576 task_t task,
577 task_policy_flavor_t flavor,
578 task_policy_t policy_info,
579 mach_msg_type_number_t *count,
580 boolean_t *get_default)
581 {
582 if (task == TASK_NULL || task == kernel_task) {
583 return KERN_INVALID_ARGUMENT;
584 }
585
586 switch (flavor) {
587 case TASK_CATEGORY_POLICY:
588 {
589 task_category_policy_t info = (task_category_policy_t)policy_info;
590
591 if (*count < TASK_CATEGORY_POLICY_COUNT) {
592 return KERN_INVALID_ARGUMENT;
593 }
594
595 if (*get_default) {
596 info->role = TASK_UNSPECIFIED;
597 } else {
598 info->role = proc_get_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
599 }
600 break;
601 }
602
603 case TASK_BASE_QOS_POLICY: /* FALLTHRU */
604 case TASK_OVERRIDE_QOS_POLICY:
605 {
606 task_qos_policy_t info = (task_qos_policy_t)policy_info;
607
608 if (*count < TASK_QOS_POLICY_COUNT) {
609 return KERN_INVALID_ARGUMENT;
610 }
611
612 if (*get_default) {
613 info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED;
614 info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED;
615 } else if (flavor == TASK_BASE_QOS_POLICY) {
616 int value1, value2;
617
618 proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
619
620 info->task_latency_qos_tier = qos_latency_policy_package(value1);
621 info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
622 } else if (flavor == TASK_OVERRIDE_QOS_POLICY) {
623 int value1, value2;
624
625 proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
626
627 info->task_latency_qos_tier = qos_latency_policy_package(value1);
628 info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
629 }
630
631 break;
632 }
633
634 case TASK_POLICY_STATE:
635 {
636 task_policy_state_t info = (task_policy_state_t)policy_info;
637
638 if (*count < TASK_POLICY_STATE_COUNT) {
639 return KERN_INVALID_ARGUMENT;
640 }
641
642 /* Only root can get this info */
643 if (!task_is_privileged(current_task())) {
644 return KERN_PROTECTION_FAILURE;
645 }
646
647 if (*get_default) {
648 info->requested = 0;
649 info->effective = 0;
650 info->pending = 0;
651 info->imp_assertcnt = 0;
652 info->imp_externcnt = 0;
653 info->flags = 0;
654 info->imp_transitions = 0;
655 } else {
656 task_lock(task);
657
658 info->requested = task_requested_bitfield(task);
659 info->effective = task_effective_bitfield(task);
660 info->pending = 0;
661
662 info->tps_requested_policy = *(uint64_t*)(&task->requested_policy);
663 info->tps_effective_policy = *(uint64_t*)(&task->effective_policy);
664
665 info->flags = 0;
666 if (task->task_imp_base != NULL) {
667 info->imp_assertcnt = task->task_imp_base->iit_assertcnt;
668 info->imp_externcnt = IIT_EXTERN(task->task_imp_base);
669 info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0);
670 info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0);
671 info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0);
672 info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0);
673 info->flags |= (get_task_pidsuspended(task) ? TASK_IS_PIDSUSPENDED : 0);
674 info->imp_transitions = task->task_imp_base->iit_transitions;
675 } else {
676 info->imp_assertcnt = 0;
677 info->imp_externcnt = 0;
678 info->imp_transitions = 0;
679 }
680 task_unlock(task);
681 }
682
683 break;
684 }
685
686 case TASK_SUPPRESSION_POLICY:
687 {
688 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
689
690 if (*count < TASK_SUPPRESSION_POLICY_COUNT) {
691 return KERN_INVALID_ARGUMENT;
692 }
693
694 task_lock(task);
695
696 if (*get_default) {
697 info->active = 0;
698 info->lowpri_cpu = 0;
699 info->timer_throttle = LATENCY_QOS_TIER_UNSPECIFIED;
700 info->disk_throttle = 0;
701 info->cpu_limit = 0;
702 info->suspend = 0;
703 info->throughput_qos = 0;
704 info->suppressed_cpu = 0;
705 } else {
706 info->active = task->requested_policy.trp_sup_active;
707 info->lowpri_cpu = task->requested_policy.trp_sup_lowpri_cpu;
708 info->timer_throttle = qos_latency_policy_package(task->requested_policy.trp_sup_timer);
709 info->disk_throttle = task->requested_policy.trp_sup_disk;
710 info->cpu_limit = 0;
711 info->suspend = 0;
712 info->throughput_qos = qos_throughput_policy_package(task->requested_policy.trp_sup_throughput);
713 info->suppressed_cpu = task->requested_policy.trp_sup_cpu;
714 info->background_sockets = task->requested_policy.trp_sup_bg_sockets;
715 }
716
717 task_unlock(task);
718 break;
719 }
720
721 default:
722 return KERN_INVALID_ARGUMENT;
723 }
724
725 return KERN_SUCCESS;
726 }
727
728 /*
729 * Called at task creation
730 * We calculate the correct effective but don't apply it to anything yet.
731 * The threads, etc will inherit from the task as they get created.
732 */
733 void
task_policy_create(task_t task,task_t parent_task)734 task_policy_create(task_t task, task_t parent_task)
735 {
736 task_set_requested_apptype(task, parent_task->requested_policy.trp_apptype, true);
737
738 task->requested_policy.trp_int_darwinbg = parent_task->requested_policy.trp_int_darwinbg;
739 task->requested_policy.trp_ext_darwinbg = parent_task->requested_policy.trp_ext_darwinbg;
740 task->requested_policy.trp_int_iotier = parent_task->requested_policy.trp_int_iotier;
741 task->requested_policy.trp_ext_iotier = parent_task->requested_policy.trp_ext_iotier;
742 task->requested_policy.trp_int_iopassive = parent_task->requested_policy.trp_int_iopassive;
743 task->requested_policy.trp_ext_iopassive = parent_task->requested_policy.trp_ext_iopassive;
744 task->requested_policy.trp_bg_iotier = parent_task->requested_policy.trp_bg_iotier;
745 task->requested_policy.trp_terminated = parent_task->requested_policy.trp_terminated;
746 task->requested_policy.trp_qos_clamp = parent_task->requested_policy.trp_qos_clamp;
747
748 if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && !task_is_exec_copy(task)) {
749 /* Do not update the apptype for exec copy task */
750 if (parent_task->requested_policy.trp_boosted) {
751 task_set_requested_apptype(task, TASK_APPTYPE_DAEMON_INTERACTIVE, true);
752 task_importance_mark_donor(task, TRUE);
753 } else {
754 task_set_requested_apptype(task, TASK_APPTYPE_DAEMON_BACKGROUND, true);
755 task_importance_mark_receiver(task, FALSE);
756 }
757 }
758
759 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
760 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START,
761 task_pid(task), teffective_0(task),
762 teffective_1(task), task->priority, 0);
763
764 task_policy_update_internal_locked(task, true, NULL);
765
766 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
767 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END,
768 task_pid(task), teffective_0(task),
769 teffective_1(task), task->priority, 0);
770
771 task_importance_update_live_donor(task);
772 }
773
774
775 static void
task_policy_update_locked(task_t task,task_pend_token_t pend_token)776 task_policy_update_locked(task_t task, task_pend_token_t pend_token)
777 {
778 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
779 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK) | DBG_FUNC_START),
780 task_pid(task), teffective_0(task),
781 teffective_1(task), task->priority, 0);
782
783 task_policy_update_internal_locked(task, false, pend_token);
784
785 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
786 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK)) | DBG_FUNC_END,
787 task_pid(task), teffective_0(task),
788 teffective_1(task), task->priority, 0);
789 }
790
791 /*
792 * One state update function TO RULE THEM ALL
793 *
794 * This function updates the task or thread effective policy fields
795 * and pushes the results to the relevant subsystems.
796 *
797 * Must call update_complete after unlocking the task,
798 * as some subsystems cannot be updated while holding the task lock.
799 *
800 * Called with task locked, not thread
801 */
802
803 static void
task_policy_update_internal_locked(task_t task,bool in_create,task_pend_token_t pend_token)804 task_policy_update_internal_locked(task_t task, bool in_create, task_pend_token_t pend_token)
805 {
806 /*
807 * Step 1:
808 * Gather requested policy
809 */
810
811 struct task_requested_policy requested = task->requested_policy;
812
813 /*
814 * Step 2:
815 * Calculate new effective policies from requested policy and task state
816 * Rules:
817 * Don't change requested, it won't take effect
818 */
819
820 struct task_effective_policy next = {};
821
822 /* Update task role */
823 next.tep_role = requested.trp_role;
824
825 /* Set task qos clamp and ceiling */
826
827 thread_qos_t role_clamp = THREAD_QOS_UNSPECIFIED;
828
829 if (requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT) {
830 switch (next.tep_role) {
831 case TASK_FOREGROUND_APPLICATION:
832 /* Foreground apps get urgent scheduler priority */
833 next.tep_qos_ui_is_urgent = 1;
834 next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
835 break;
836
837 case TASK_BACKGROUND_APPLICATION:
838 /* This is really 'non-focal but on-screen' */
839 next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
840 break;
841
842 case TASK_DEFAULT_APPLICATION:
843 /* This is 'may render UI but we don't know if it's focal/nonfocal' */
844 next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
845 break;
846
847 case TASK_NONUI_APPLICATION:
848 /* i.e. 'off-screen' */
849 next.tep_qos_ceiling = THREAD_QOS_LEGACY;
850 break;
851
852 case TASK_CONTROL_APPLICATION:
853 case TASK_GRAPHICS_SERVER:
854 next.tep_qos_ui_is_urgent = 1;
855 next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
856 break;
857
858 case TASK_THROTTLE_APPLICATION:
859 /* i.e. 'TAL launch' */
860 next.tep_qos_ceiling = THREAD_QOS_UTILITY;
861 role_clamp = THREAD_QOS_UTILITY;
862 break;
863
864 case TASK_DARWINBG_APPLICATION:
865 /* i.e. 'DARWIN_BG throttled background application' */
866 next.tep_qos_ceiling = THREAD_QOS_BACKGROUND;
867 break;
868
869 case TASK_UNSPECIFIED:
870 default:
871 /* Apps that don't have an application role get
872 * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */
873 next.tep_qos_ceiling = THREAD_QOS_LEGACY;
874 break;
875 }
876 } else {
877 /* Daemons and dext get USER_INTERACTIVE squashed to USER_INITIATED */
878 next.tep_qos_ceiling = THREAD_QOS_USER_INITIATED;
879 }
880
881 if (role_clamp != THREAD_QOS_UNSPECIFIED) {
882 if (requested.trp_qos_clamp != THREAD_QOS_UNSPECIFIED) {
883 next.tep_qos_clamp = MIN(role_clamp, requested.trp_qos_clamp);
884 } else {
885 next.tep_qos_clamp = role_clamp;
886 }
887 } else {
888 next.tep_qos_clamp = requested.trp_qos_clamp;
889 }
890
891 /* Calculate DARWIN_BG */
892 bool wants_darwinbg = false;
893 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */
894 bool wants_watchersbg = false; /* Do I want my pidbound threads to be bg */
895 bool adaptive_bg_only = false; /* This task is BG only because it's adaptive unboosted */
896
897 /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */
898 if (requested.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
899 requested.trp_boosted == 0) {
900 wants_darwinbg = true;
901 adaptive_bg_only = true;
902 }
903
904 /*
905 * If DARWIN_BG has been requested at either level, it's engaged.
906 * Only true DARWIN_BG changes cause watchers to transition.
907 *
908 * Backgrounding due to apptype does.
909 */
910 if (requested.trp_int_darwinbg || requested.trp_ext_darwinbg ||
911 next.tep_role == TASK_DARWINBG_APPLICATION) {
912 wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = true;
913 adaptive_bg_only = false;
914 }
915
916 /* Application launching in special Transparent App Lifecycle throttle mode */
917 if (requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT &&
918 requested.trp_role == TASK_THROTTLE_APPLICATION) {
919 next.tep_tal_engaged = 1;
920 }
921
922 /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */
923 if (requested.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) {
924 wants_darwinbg = true;
925 adaptive_bg_only = false;
926 }
927
928 if (next.tep_qos_clamp == THREAD_QOS_BACKGROUND ||
929 next.tep_qos_clamp == THREAD_QOS_MAINTENANCE) {
930 wants_darwinbg = true;
931 adaptive_bg_only = false;
932 }
933
934 /* Calculate side effects of DARWIN_BG */
935
936 if (wants_darwinbg) {
937 next.tep_darwinbg = 1;
938 /* darwinbg tasks always create bg sockets, but we don't always loop over all sockets */
939 next.tep_new_sockets_bg = 1;
940 next.tep_lowpri_cpu = 1;
941 }
942
943 if (wants_all_sockets_bg) {
944 next.tep_all_sockets_bg = 1;
945 }
946
947 if (wants_watchersbg) {
948 next.tep_watchers_bg = 1;
949 }
950
951 next.tep_adaptive_bg = adaptive_bg_only;
952
953 /* Calculate low CPU priority */
954
955 boolean_t wants_lowpri_cpu = false;
956
957 if (wants_darwinbg) {
958 wants_lowpri_cpu = true;
959 }
960
961 if (requested.trp_sup_lowpri_cpu && requested.trp_boosted == 0) {
962 wants_lowpri_cpu = true;
963 }
964
965 if (wants_lowpri_cpu) {
966 next.tep_lowpri_cpu = 1;
967 }
968
969 /* Calculate IO policy */
970
971 /* Update BG IO policy (so we can see if it has changed) */
972 next.tep_bg_iotier = requested.trp_bg_iotier;
973
974 int iopol = THROTTLE_LEVEL_TIER0;
975
976 if (wants_darwinbg) {
977 iopol = MAX(iopol, requested.trp_bg_iotier);
978 }
979
980 if (requested.trp_apptype == TASK_APPTYPE_DAEMON_STANDARD) {
981 iopol = MAX(iopol, proc_standard_daemon_tier);
982 }
983
984 if (requested.trp_sup_disk && requested.trp_boosted == 0) {
985 iopol = MAX(iopol, proc_suppressed_disk_tier);
986 }
987
988 if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
989 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.tep_qos_clamp]);
990 }
991
992 iopol = MAX(iopol, requested.trp_int_iotier);
993 iopol = MAX(iopol, requested.trp_ext_iotier);
994
995 next.tep_io_tier = iopol;
996
997 /* Calculate Passive IO policy */
998
999 if (requested.trp_ext_iopassive || requested.trp_int_iopassive) {
1000 next.tep_io_passive = 1;
1001 }
1002
1003 /* Calculate suppression-active flag */
1004 boolean_t appnap_transition = false;
1005
1006 if (requested.trp_sup_active && requested.trp_boosted == 0) {
1007 next.tep_sup_active = 1;
1008 }
1009
1010 if (task->effective_policy.tep_sup_active != next.tep_sup_active) {
1011 appnap_transition = true;
1012 }
1013
1014 /* Calculate timer QOS */
1015 int latency_qos = requested.trp_base_latency_qos;
1016
1017 if (requested.trp_sup_timer && requested.trp_boosted == 0) {
1018 latency_qos = requested.trp_sup_timer;
1019 }
1020
1021 if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1022 latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.tep_qos_clamp]);
1023 }
1024
1025 if (requested.trp_over_latency_qos != 0) {
1026 latency_qos = requested.trp_over_latency_qos;
1027 }
1028
1029 /* Treat the windowserver special */
1030 if (requested.trp_role == TASK_GRAPHICS_SERVER) {
1031 latency_qos = proc_graphics_timer_qos;
1032 }
1033
1034 next.tep_latency_qos = latency_qos;
1035
1036 /* Calculate throughput QOS */
1037 int through_qos = requested.trp_base_through_qos;
1038
1039 if (requested.trp_sup_throughput && requested.trp_boosted == 0) {
1040 through_qos = requested.trp_sup_throughput;
1041 }
1042
1043 if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1044 through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.tep_qos_clamp]);
1045 }
1046
1047 if (requested.trp_over_through_qos != 0) {
1048 through_qos = requested.trp_over_through_qos;
1049 }
1050
1051 next.tep_through_qos = through_qos;
1052
1053 /* Calculate suppressed CPU priority */
1054 if (requested.trp_sup_cpu && requested.trp_boosted == 0) {
1055 next.tep_suppressed_cpu = 1;
1056 }
1057
1058 /*
1059 * Calculate background sockets
1060 * Don't take into account boosting to limit transition frequency.
1061 */
1062 if (requested.trp_sup_bg_sockets) {
1063 next.tep_all_sockets_bg = 1;
1064 next.tep_new_sockets_bg = 1;
1065 }
1066
1067 /* Apply SFI Managed class bit */
1068 next.tep_sfi_managed = requested.trp_sfi_managed;
1069
1070 /* Calculate 'live donor' status for live importance */
1071 switch (requested.trp_apptype) {
1072 case TASK_APPTYPE_APP_TAL:
1073 case TASK_APPTYPE_APP_DEFAULT:
1074 if (requested.trp_ext_darwinbg == 1 ||
1075 (next.tep_sup_active == 1 &&
1076 (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_NONDONOR)) ||
1077 next.tep_role == TASK_DARWINBG_APPLICATION) {
1078 next.tep_live_donor = 0;
1079 } else {
1080 next.tep_live_donor = 1;
1081 }
1082 break;
1083
1084 case TASK_APPTYPE_DAEMON_INTERACTIVE:
1085 case TASK_APPTYPE_DAEMON_STANDARD:
1086 case TASK_APPTYPE_DAEMON_ADAPTIVE:
1087 case TASK_APPTYPE_DAEMON_BACKGROUND:
1088 case TASK_APPTYPE_DRIVER:
1089 default:
1090 next.tep_live_donor = 0;
1091 break;
1092 }
1093
1094 if (requested.trp_terminated) {
1095 /*
1096 * Shoot down the throttles that slow down exit or response to SIGTERM
1097 * We don't need to shoot down:
1098 * passive (don't want to cause others to throttle)
1099 * all_sockets_bg (don't need to iterate FDs on every exit)
1100 * new_sockets_bg (doesn't matter for exiting process)
1101 * pidsuspend (jetsam-ed BG process shouldn't run again)
1102 * watchers_bg (watcher threads don't need to be unthrottled)
1103 * latency_qos (affects userspace timers only)
1104 */
1105
1106 next.tep_terminated = 1;
1107 next.tep_darwinbg = 0;
1108 next.tep_lowpri_cpu = 0;
1109 next.tep_io_tier = THROTTLE_LEVEL_TIER0;
1110 next.tep_tal_engaged = 0;
1111 next.tep_role = TASK_UNSPECIFIED;
1112 next.tep_suppressed_cpu = 0;
1113 }
1114
1115 /*
1116 * Step 3:
1117 * Swap out old policy for new policy
1118 */
1119
1120 struct task_effective_policy prev = task->effective_policy;
1121
1122 /* This is the point where the new values become visible to other threads */
1123 task->effective_policy = next;
1124
1125 /* Don't do anything further to a half-formed task */
1126 if (in_create) {
1127 return;
1128 }
1129
1130 if (task == kernel_task) {
1131 panic("Attempting to set task policy on kernel_task");
1132 }
1133
1134 /*
1135 * Step 4:
1136 * Pend updates that can't be done while holding the task lock
1137 */
1138
1139 if (prev.tep_all_sockets_bg != next.tep_all_sockets_bg) {
1140 pend_token->tpt_update_sockets = 1;
1141 }
1142
1143 /* Only re-scan the timer list if the qos level is getting less strong */
1144 if (prev.tep_latency_qos > next.tep_latency_qos) {
1145 pend_token->tpt_update_timers = 1;
1146 }
1147
1148 #if CONFIG_TASKWATCH
1149 if (prev.tep_watchers_bg != next.tep_watchers_bg) {
1150 pend_token->tpt_update_watchers = 1;
1151 }
1152 #endif /* CONFIG_TASKWATCH */
1153
1154 if (prev.tep_live_donor != next.tep_live_donor) {
1155 pend_token->tpt_update_live_donor = 1;
1156 }
1157
1158 /*
1159 * Step 5:
1160 * Update other subsystems as necessary if something has changed
1161 */
1162
1163 bool update_threads = false, update_sfi = false;
1164
1165 /*
1166 * Check for the attributes that thread_policy_update_internal_locked() consults,
1167 * and trigger thread policy re-evaluation.
1168 */
1169 if (prev.tep_io_tier != next.tep_io_tier ||
1170 prev.tep_bg_iotier != next.tep_bg_iotier ||
1171 prev.tep_io_passive != next.tep_io_passive ||
1172 prev.tep_darwinbg != next.tep_darwinbg ||
1173 prev.tep_qos_clamp != next.tep_qos_clamp ||
1174 prev.tep_qos_ceiling != next.tep_qos_ceiling ||
1175 prev.tep_qos_ui_is_urgent != next.tep_qos_ui_is_urgent ||
1176 prev.tep_latency_qos != next.tep_latency_qos ||
1177 prev.tep_through_qos != next.tep_through_qos ||
1178 prev.tep_lowpri_cpu != next.tep_lowpri_cpu ||
1179 prev.tep_new_sockets_bg != next.tep_new_sockets_bg ||
1180 prev.tep_terminated != next.tep_terminated ||
1181 prev.tep_adaptive_bg != next.tep_adaptive_bg) {
1182 update_threads = true;
1183 }
1184
1185 /*
1186 * Check for the attributes that sfi_thread_classify() consults,
1187 * and trigger SFI re-evaluation.
1188 */
1189 if (prev.tep_latency_qos != next.tep_latency_qos ||
1190 prev.tep_role != next.tep_role ||
1191 prev.tep_sfi_managed != next.tep_sfi_managed) {
1192 update_sfi = true;
1193 }
1194
1195 /* Reflect task role transitions into the coalition role counters */
1196 if (prev.tep_role != next.tep_role) {
1197 if (task_policy_update_coalition_focal_tasks(task, prev.tep_role, next.tep_role, pend_token)) {
1198 update_sfi = true;
1199 }
1200 }
1201
1202 bool update_priority = false;
1203
1204 int16_t priority = BASEPRI_DEFAULT;
1205 int16_t max_priority = MAXPRI_USER;
1206
1207 if (next.tep_lowpri_cpu) {
1208 priority = MAXPRI_THROTTLE;
1209 max_priority = MAXPRI_THROTTLE;
1210 } else if (next.tep_suppressed_cpu) {
1211 priority = MAXPRI_SUPPRESSED;
1212 max_priority = MAXPRI_SUPPRESSED;
1213 } else {
1214 switch (next.tep_role) {
1215 case TASK_CONTROL_APPLICATION:
1216 priority = BASEPRI_CONTROL;
1217 break;
1218 case TASK_GRAPHICS_SERVER:
1219 priority = BASEPRI_GRAPHICS;
1220 max_priority = MAXPRI_RESERVED;
1221 break;
1222 default:
1223 break;
1224 }
1225
1226 /* factor in 'nice' value */
1227 priority += task->importance;
1228
1229 if (task->effective_policy.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1230 int16_t qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.tep_qos_clamp];
1231
1232 priority = MIN(priority, qos_clamp_priority);
1233 max_priority = MIN(max_priority, qos_clamp_priority);
1234 }
1235
1236 if (priority > max_priority) {
1237 priority = max_priority;
1238 } else if (priority < MINPRI) {
1239 priority = MINPRI;
1240 }
1241 }
1242
1243 assert(priority <= max_priority);
1244
1245 /* avoid extra work if priority isn't changing */
1246 if (priority != task->priority ||
1247 max_priority != task->max_priority) {
1248 /* update the scheduling priority for the task */
1249 task->max_priority = max_priority;
1250 task->priority = priority;
1251 update_priority = true;
1252 }
1253
1254 /* Loop over the threads in the task:
1255 * only once
1256 * only if necessary
1257 * with one thread mutex hold per thread
1258 */
1259 if (update_threads || update_priority || update_sfi) {
1260 thread_t thread;
1261
1262 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1263 struct task_pend_token thread_pend_token = {};
1264
1265 if (update_sfi) {
1266 thread_pend_token.tpt_update_thread_sfi = 1;
1267 }
1268
1269 if (update_priority || update_threads) {
1270 thread_policy_update_tasklocked(thread,
1271 task->priority, task->max_priority,
1272 &thread_pend_token);
1273 }
1274
1275 assert(!thread_pend_token.tpt_update_sockets);
1276
1277 // Slightly risky, as we still hold the task lock...
1278 thread_policy_update_complete_unlocked(thread, &thread_pend_token);
1279 }
1280 }
1281
1282 /*
1283 * Use the app-nap transitions to influence the
1284 * transition of the process within the jetsam band
1285 * [and optionally its live-donor status]
1286 * On macOS only.
1287 */
1288 if (appnap_transition) {
1289 if (task->effective_policy.tep_sup_active == 1) {
1290 memorystatus_update_priority_for_appnap(((proc_t) get_bsdtask_info(task)), TRUE);
1291 } else {
1292 memorystatus_update_priority_for_appnap(((proc_t) get_bsdtask_info(task)), FALSE);
1293 }
1294 }
1295 }
1296
1297
1298 /*
1299 * Yet another layering violation. We reach out and bang on the coalition directly.
1300 */
1301 static boolean_t
task_policy_update_coalition_focal_tasks(task_t task,int prev_role,int next_role,task_pend_token_t pend_token)1302 task_policy_update_coalition_focal_tasks(task_t task,
1303 int prev_role,
1304 int next_role,
1305 task_pend_token_t pend_token)
1306 {
1307 boolean_t sfi_transition = FALSE;
1308 uint32_t new_count = 0;
1309
1310 /* task moving into/out-of the foreground */
1311 if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) {
1312 if (task_coalition_adjust_focal_count(task, 1, &new_count) && (new_count == 1)) {
1313 sfi_transition = TRUE;
1314 pend_token->tpt_update_tg_ui_flag = TRUE;
1315 }
1316 } else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) {
1317 if (task_coalition_adjust_focal_count(task, -1, &new_count) && (new_count == 0)) {
1318 sfi_transition = TRUE;
1319 pend_token->tpt_update_tg_ui_flag = TRUE;
1320 }
1321 }
1322
1323 /* task moving into/out-of background */
1324 if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) {
1325 if (task_coalition_adjust_nonfocal_count(task, 1, &new_count) && (new_count == 1)) {
1326 sfi_transition = TRUE;
1327 }
1328 } else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) {
1329 if (task_coalition_adjust_nonfocal_count(task, -1, &new_count) && (new_count == 0)) {
1330 sfi_transition = TRUE;
1331 }
1332 }
1333
1334 if (sfi_transition) {
1335 pend_token->tpt_update_coal_sfi = 1;
1336 }
1337 return sfi_transition;
1338 }
1339
1340 #if CONFIG_SCHED_SFI
1341
1342 /* coalition object is locked */
1343 static void
task_sfi_reevaluate_cb(coalition_t coal,void * ctx,task_t task)1344 task_sfi_reevaluate_cb(coalition_t coal, void *ctx, task_t task)
1345 {
1346 thread_t thread;
1347
1348 /* unused for now */
1349 (void)coal;
1350
1351 /* skip the task we're re-evaluating on behalf of: it's already updated */
1352 if (task == (task_t)ctx) {
1353 return;
1354 }
1355
1356 task_lock(task);
1357
1358 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1359 sfi_reevaluate(thread);
1360 }
1361
1362 task_unlock(task);
1363 }
1364 #endif /* CONFIG_SCHED_SFI */
1365
1366 /*
1367 * Called with task unlocked to do things that can't be done while holding the task lock
1368 */
1369 void
task_policy_update_complete_unlocked(task_t task,task_pend_token_t pend_token)1370 task_policy_update_complete_unlocked(task_t task, task_pend_token_t pend_token)
1371 {
1372 #ifdef MACH_BSD
1373 if (pend_token->tpt_update_sockets) {
1374 proc_apply_task_networkbg(task_pid(task), THREAD_NULL);
1375 }
1376 #endif /* MACH_BSD */
1377
1378 /* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */
1379 if (pend_token->tpt_update_timers) {
1380 ml_timer_evaluate();
1381 }
1382
1383 #if CONFIG_TASKWATCH
1384 if (pend_token->tpt_update_watchers) {
1385 apply_appstate_watchers(task);
1386 }
1387 #endif /* CONFIG_TASKWATCH */
1388
1389 if (pend_token->tpt_update_live_donor) {
1390 task_importance_update_live_donor(task);
1391 }
1392
1393 #if CONFIG_SCHED_SFI
1394 /* use the resource coalition for SFI re-evaluation */
1395 if (pend_token->tpt_update_coal_sfi) {
1396 coalition_for_each_task(task->coalition[COALITION_TYPE_RESOURCE],
1397 (void *)task, task_sfi_reevaluate_cb);
1398 }
1399 #endif /* CONFIG_SCHED_SFI */
1400
1401 #if CONFIG_THREAD_GROUPS
1402 if (pend_token->tpt_update_tg_ui_flag) {
1403 task_coalition_thread_group_focal_update(task);
1404 }
1405 if (pend_token->tpt_update_tg_app_flag) {
1406 task_coalition_thread_group_application_set(task);
1407 }
1408 #endif /* CONFIG_THREAD_GROUPS */
1409 }
1410
1411 /*
1412 * Initiate a task policy state transition
1413 *
1414 * Everything that modifies requested except functions that need to hold the task lock
1415 * should use this function
1416 *
1417 * Argument validation should be performed before reaching this point.
1418 *
1419 * TODO: Do we need to check task->active?
1420 */
1421 void
proc_set_task_policy(task_t task,int category,int flavor,int value)1422 proc_set_task_policy(task_t task,
1423 int category,
1424 int flavor,
1425 int value)
1426 {
1427 struct task_pend_token pend_token = {};
1428
1429 task_lock(task);
1430
1431 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1432 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_START,
1433 task_pid(task), trequested_0(task),
1434 trequested_1(task), value, 0);
1435
1436 proc_set_task_policy_locked(task, category, flavor, value, 0);
1437
1438 task_policy_update_locked(task, &pend_token);
1439
1440
1441 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1442 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_END,
1443 task_pid(task), trequested_0(task),
1444 trequested_1(task), tpending(&pend_token), 0);
1445
1446 task_unlock(task);
1447
1448 task_policy_update_complete_unlocked(task, &pend_token);
1449 }
1450
1451 /*
1452 * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure.
1453 * Same locking rules apply.
1454 */
1455 void
proc_set_task_policy2(task_t task,int category,int flavor,int value,int value2)1456 proc_set_task_policy2(task_t task,
1457 int category,
1458 int flavor,
1459 int value,
1460 int value2)
1461 {
1462 struct task_pend_token pend_token = {};
1463
1464 task_lock(task);
1465
1466 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1467 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_START,
1468 task_pid(task), trequested_0(task),
1469 trequested_1(task), value, 0);
1470
1471 proc_set_task_policy_locked(task, category, flavor, value, value2);
1472
1473 task_policy_update_locked(task, &pend_token);
1474
1475 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1476 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_END,
1477 task_pid(task), trequested_0(task),
1478 trequested_1(task), tpending(&pend_token), 0);
1479
1480 task_unlock(task);
1481
1482 task_policy_update_complete_unlocked(task, &pend_token);
1483 }
1484
1485 /*
1486 * Set the requested state for a specific flavor to a specific value.
1487 *
1488 * TODO:
1489 * Verify that arguments to non iopol things are 1 or 0
1490 */
1491 static void
proc_set_task_policy_locked(task_t task,int category,int flavor,int value,int value2)1492 proc_set_task_policy_locked(task_t task,
1493 int category,
1494 int flavor,
1495 int value,
1496 int value2)
1497 {
1498 int tier, passive;
1499
1500 struct task_requested_policy requested = task->requested_policy;
1501
1502 switch (flavor) {
1503 /* Category: EXTERNAL and INTERNAL */
1504
1505 case TASK_POLICY_DARWIN_BG:
1506 if (category == TASK_POLICY_EXTERNAL) {
1507 requested.trp_ext_darwinbg = value;
1508 } else {
1509 requested.trp_int_darwinbg = value;
1510 }
1511 break;
1512
1513 case TASK_POLICY_IOPOL:
1514 proc_iopol_to_tier(value, &tier, &passive);
1515 if (category == TASK_POLICY_EXTERNAL) {
1516 requested.trp_ext_iotier = tier;
1517 requested.trp_ext_iopassive = passive;
1518 } else {
1519 requested.trp_int_iotier = tier;
1520 requested.trp_int_iopassive = passive;
1521 }
1522 break;
1523
1524 case TASK_POLICY_IO:
1525 if (category == TASK_POLICY_EXTERNAL) {
1526 requested.trp_ext_iotier = value;
1527 } else {
1528 requested.trp_int_iotier = value;
1529 }
1530 break;
1531
1532 case TASK_POLICY_PASSIVE_IO:
1533 if (category == TASK_POLICY_EXTERNAL) {
1534 requested.trp_ext_iopassive = value;
1535 } else {
1536 requested.trp_int_iopassive = value;
1537 }
1538 break;
1539
1540 /* Category: INTERNAL */
1541
1542 case TASK_POLICY_DARWIN_BG_IOPOL:
1543 assert(category == TASK_POLICY_INTERNAL);
1544 proc_iopol_to_tier(value, &tier, &passive);
1545 requested.trp_bg_iotier = tier;
1546 break;
1547
1548 /* Category: ATTRIBUTE */
1549
1550 case TASK_POLICY_BOOST:
1551 assert(category == TASK_POLICY_ATTRIBUTE);
1552 requested.trp_boosted = value;
1553 break;
1554
1555 case TASK_POLICY_ROLE:
1556 assert(category == TASK_POLICY_ATTRIBUTE);
1557 requested.trp_role = value;
1558 break;
1559
1560 case TASK_POLICY_TERMINATED:
1561 assert(category == TASK_POLICY_ATTRIBUTE);
1562 requested.trp_terminated = value;
1563 break;
1564
1565 case TASK_BASE_LATENCY_QOS_POLICY:
1566 assert(category == TASK_POLICY_ATTRIBUTE);
1567 requested.trp_base_latency_qos = value;
1568 break;
1569
1570 case TASK_BASE_THROUGHPUT_QOS_POLICY:
1571 assert(category == TASK_POLICY_ATTRIBUTE);
1572 requested.trp_base_through_qos = value;
1573 break;
1574
1575 case TASK_POLICY_SFI_MANAGED:
1576 assert(category == TASK_POLICY_ATTRIBUTE);
1577 requested.trp_sfi_managed = value;
1578 break;
1579
1580 case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1581 assert(category == TASK_POLICY_ATTRIBUTE);
1582 requested.trp_base_latency_qos = value;
1583 requested.trp_base_through_qos = value2;
1584 break;
1585
1586 case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1587 assert(category == TASK_POLICY_ATTRIBUTE);
1588 requested.trp_over_latency_qos = value;
1589 requested.trp_over_through_qos = value2;
1590 break;
1591
1592 default:
1593 panic("unknown task policy: %d %d %d %d", category, flavor, value, value2);
1594 break;
1595 }
1596
1597 task->requested_policy = requested;
1598 }
1599
1600 /*
1601 * Gets what you set. Effective values may be different.
1602 */
1603 int
proc_get_task_policy(task_t task,int category,int flavor)1604 proc_get_task_policy(task_t task,
1605 int category,
1606 int flavor)
1607 {
1608 int value = 0;
1609
1610 task_lock(task);
1611
1612 struct task_requested_policy requested = task->requested_policy;
1613
1614 switch (flavor) {
1615 case TASK_POLICY_DARWIN_BG:
1616 if (category == TASK_POLICY_EXTERNAL) {
1617 value = requested.trp_ext_darwinbg;
1618 } else {
1619 value = requested.trp_int_darwinbg;
1620 }
1621 break;
1622 case TASK_POLICY_IOPOL:
1623 if (category == TASK_POLICY_EXTERNAL) {
1624 value = proc_tier_to_iopol(requested.trp_ext_iotier,
1625 requested.trp_ext_iopassive);
1626 } else {
1627 value = proc_tier_to_iopol(requested.trp_int_iotier,
1628 requested.trp_int_iopassive);
1629 }
1630 break;
1631 case TASK_POLICY_IO:
1632 if (category == TASK_POLICY_EXTERNAL) {
1633 value = requested.trp_ext_iotier;
1634 } else {
1635 value = requested.trp_int_iotier;
1636 }
1637 break;
1638 case TASK_POLICY_PASSIVE_IO:
1639 if (category == TASK_POLICY_EXTERNAL) {
1640 value = requested.trp_ext_iopassive;
1641 } else {
1642 value = requested.trp_int_iopassive;
1643 }
1644 break;
1645 case TASK_POLICY_DARWIN_BG_IOPOL:
1646 assert(category == TASK_POLICY_INTERNAL);
1647 value = proc_tier_to_iopol(requested.trp_bg_iotier, 0);
1648 break;
1649 case TASK_POLICY_ROLE:
1650 assert(category == TASK_POLICY_ATTRIBUTE);
1651 value = requested.trp_role;
1652 break;
1653 case TASK_POLICY_SFI_MANAGED:
1654 assert(category == TASK_POLICY_ATTRIBUTE);
1655 value = requested.trp_sfi_managed;
1656 break;
1657 default:
1658 panic("unknown policy_flavor %d", flavor);
1659 break;
1660 }
1661
1662 task_unlock(task);
1663
1664 return value;
1665 }
1666
1667 /*
1668 * Variant of proc_get_task_policy() that returns two scalar outputs.
1669 */
1670 void
proc_get_task_policy2(task_t task,__assert_only int category,int flavor,int * value1,int * value2)1671 proc_get_task_policy2(task_t task,
1672 __assert_only int category,
1673 int flavor,
1674 int *value1,
1675 int *value2)
1676 {
1677 task_lock(task);
1678
1679 struct task_requested_policy requested = task->requested_policy;
1680
1681 switch (flavor) {
1682 case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1683 assert(category == TASK_POLICY_ATTRIBUTE);
1684 *value1 = requested.trp_base_latency_qos;
1685 *value2 = requested.trp_base_through_qos;
1686 break;
1687
1688 case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1689 assert(category == TASK_POLICY_ATTRIBUTE);
1690 *value1 = requested.trp_over_latency_qos;
1691 *value2 = requested.trp_over_through_qos;
1692 break;
1693
1694 default:
1695 panic("unknown policy_flavor %d", flavor);
1696 break;
1697 }
1698
1699 task_unlock(task);
1700 }
1701
1702 /*
1703 * Function for querying effective state for relevant subsystems
1704 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1705 *
1706 * ONLY the relevant subsystem should query this.
1707 * NEVER take a value from the 'effective' function and stuff it into a setter.
1708 *
1709 * NOTE: This accessor does not take the task lock.
1710 * Notifications of state updates need to be externally synchronized with state queries.
1711 * This routine *MUST* remain interrupt safe, as it is potentially invoked
1712 * within the context of a timer interrupt. It is also called in KDP context for stackshot.
1713 */
1714 int
proc_get_effective_task_policy(task_t task,int flavor)1715 proc_get_effective_task_policy(task_t task,
1716 int flavor)
1717 {
1718 int value = 0;
1719
1720 switch (flavor) {
1721 case TASK_POLICY_DARWIN_BG:
1722 /*
1723 * This backs the KPI call proc_pidbackgrounded to find
1724 * out if a pid is backgrounded.
1725 * It is used to communicate state to the VM system, as well as
1726 * prioritizing requests to the graphics system.
1727 * Returns 1 for background mode, 0 for normal mode
1728 */
1729 value = task->effective_policy.tep_darwinbg;
1730 break;
1731 case TASK_POLICY_ALL_SOCKETS_BG:
1732 /*
1733 * do_background_socket() calls this to determine what it should do to the proc's sockets
1734 * Returns 1 for background mode, 0 for normal mode
1735 *
1736 * This consults both thread and task so un-DBGing a thread while the task is BG
1737 * doesn't get you out of the network throttle.
1738 */
1739 value = task->effective_policy.tep_all_sockets_bg;
1740 break;
1741 case TASK_POLICY_SUP_ACTIVE:
1742 /*
1743 * Is the task in AppNap? This is used to determine the urgency
1744 * that's passed to the performance management subsystem for threads
1745 * that are running at a priority <= MAXPRI_THROTTLE.
1746 */
1747 value = task->effective_policy.tep_sup_active;
1748 break;
1749 case TASK_POLICY_LATENCY_QOS:
1750 /*
1751 * timer arming calls into here to find out the timer coalescing level
1752 * Returns a QoS tier (0-6)
1753 */
1754 value = task->effective_policy.tep_latency_qos;
1755 break;
1756 case TASK_POLICY_THROUGH_QOS:
1757 /*
1758 * This value is passed into the urgency callout from the scheduler
1759 * to the performance management subsystem.
1760 * Returns a QoS tier (0-6)
1761 */
1762 value = task->effective_policy.tep_through_qos;
1763 break;
1764 case TASK_POLICY_ROLE:
1765 /*
1766 * This controls various things that ask whether a process is foreground,
1767 * like SFI, VM, access to GPU, etc
1768 */
1769 value = task->effective_policy.tep_role;
1770 break;
1771 case TASK_POLICY_WATCHERS_BG:
1772 /*
1773 * This controls whether or not a thread watching this process should be BG.
1774 */
1775 value = task->effective_policy.tep_watchers_bg;
1776 break;
1777 case TASK_POLICY_SFI_MANAGED:
1778 /*
1779 * This controls whether or not a process is targeted for specific control by thermald.
1780 */
1781 value = task->effective_policy.tep_sfi_managed;
1782 break;
1783 default:
1784 panic("unknown policy_flavor %d", flavor);
1785 break;
1786 }
1787
1788 return value;
1789 }
1790
1791 /*
1792 * Convert from IOPOL_* values to throttle tiers.
1793 *
1794 * TODO: Can this be made more compact, like an array lookup
1795 * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future
1796 */
1797
1798 void
proc_iopol_to_tier(int iopolicy,int * tier,int * passive)1799 proc_iopol_to_tier(int iopolicy, int *tier, int *passive)
1800 {
1801 *passive = 0;
1802 *tier = 0;
1803 switch (iopolicy) {
1804 case IOPOL_IMPORTANT:
1805 *tier = THROTTLE_LEVEL_TIER0;
1806 break;
1807 case IOPOL_PASSIVE:
1808 *tier = THROTTLE_LEVEL_TIER0;
1809 *passive = 1;
1810 break;
1811 case IOPOL_STANDARD:
1812 *tier = THROTTLE_LEVEL_TIER1;
1813 break;
1814 case IOPOL_UTILITY:
1815 *tier = THROTTLE_LEVEL_TIER2;
1816 break;
1817 case IOPOL_THROTTLE:
1818 *tier = THROTTLE_LEVEL_TIER3;
1819 break;
1820 default:
1821 panic("unknown I/O policy %d", iopolicy);
1822 break;
1823 }
1824 }
1825
1826 int
proc_tier_to_iopol(int tier,int passive)1827 proc_tier_to_iopol(int tier, int passive)
1828 {
1829 if (passive == 1) {
1830 switch (tier) {
1831 case THROTTLE_LEVEL_TIER0:
1832 return IOPOL_PASSIVE;
1833 default:
1834 panic("unknown passive tier %d", tier);
1835 return IOPOL_DEFAULT;
1836 }
1837 } else {
1838 switch (tier) {
1839 case THROTTLE_LEVEL_NONE:
1840 case THROTTLE_LEVEL_TIER0:
1841 return IOPOL_DEFAULT;
1842 case THROTTLE_LEVEL_TIER1:
1843 return IOPOL_STANDARD;
1844 case THROTTLE_LEVEL_TIER2:
1845 return IOPOL_UTILITY;
1846 case THROTTLE_LEVEL_TIER3:
1847 return IOPOL_THROTTLE;
1848 default:
1849 panic("unknown tier %d", tier);
1850 return IOPOL_DEFAULT;
1851 }
1852 }
1853 }
1854
1855 int
proc_darwin_role_to_task_role(int darwin_role,task_role_t * task_role)1856 proc_darwin_role_to_task_role(int darwin_role, task_role_t* task_role)
1857 {
1858 integer_t role = TASK_UNSPECIFIED;
1859
1860 switch (darwin_role) {
1861 case PRIO_DARWIN_ROLE_DEFAULT:
1862 role = TASK_UNSPECIFIED;
1863 break;
1864 case PRIO_DARWIN_ROLE_UI_FOCAL:
1865 role = TASK_FOREGROUND_APPLICATION;
1866 break;
1867 case PRIO_DARWIN_ROLE_UI:
1868 role = TASK_DEFAULT_APPLICATION;
1869 break;
1870 case PRIO_DARWIN_ROLE_NON_UI:
1871 role = TASK_NONUI_APPLICATION;
1872 break;
1873 case PRIO_DARWIN_ROLE_UI_NON_FOCAL:
1874 role = TASK_BACKGROUND_APPLICATION;
1875 break;
1876 case PRIO_DARWIN_ROLE_TAL_LAUNCH:
1877 role = TASK_THROTTLE_APPLICATION;
1878 break;
1879 case PRIO_DARWIN_ROLE_DARWIN_BG:
1880 role = TASK_DARWINBG_APPLICATION;
1881 break;
1882 default:
1883 return EINVAL;
1884 }
1885
1886 *task_role = role;
1887
1888 return 0;
1889 }
1890
1891 int
proc_task_role_to_darwin_role(task_role_t task_role)1892 proc_task_role_to_darwin_role(task_role_t task_role)
1893 {
1894 switch (task_role) {
1895 case TASK_FOREGROUND_APPLICATION:
1896 return PRIO_DARWIN_ROLE_UI_FOCAL;
1897 case TASK_BACKGROUND_APPLICATION:
1898 return PRIO_DARWIN_ROLE_UI_NON_FOCAL;
1899 case TASK_NONUI_APPLICATION:
1900 return PRIO_DARWIN_ROLE_NON_UI;
1901 case TASK_DEFAULT_APPLICATION:
1902 return PRIO_DARWIN_ROLE_UI;
1903 case TASK_THROTTLE_APPLICATION:
1904 return PRIO_DARWIN_ROLE_TAL_LAUNCH;
1905 case TASK_DARWINBG_APPLICATION:
1906 return PRIO_DARWIN_ROLE_DARWIN_BG;
1907 case TASK_UNSPECIFIED:
1908 default:
1909 return PRIO_DARWIN_ROLE_DEFAULT;
1910 }
1911 }
1912
1913
1914 /* TODO: remove this variable when interactive daemon audit period is over */
1915 static TUNABLE(bool, ipc_importance_interactive_receiver,
1916 "imp_interactive_receiver", false);
1917
1918 /*
1919 * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process
1920 *
1921 * TODO: Make this function more table-driven instead of ad-hoc
1922 */
1923 void
proc_set_task_spawnpolicy(task_t task,thread_t thread,int apptype,int qos_clamp,task_role_t role,ipc_port_t * portwatch_ports,uint32_t portwatch_count)1924 proc_set_task_spawnpolicy(task_t task, thread_t thread, int apptype, int qos_clamp, task_role_t role,
1925 ipc_port_t * portwatch_ports, uint32_t portwatch_count)
1926 {
1927 struct task_pend_token pend_token = {};
1928
1929 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1930 (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START,
1931 task_pid(task), trequested_0(task), trequested_1(task),
1932 apptype, 0);
1933
1934 if (apptype != TASK_APPTYPE_NONE) {
1935 /*
1936 * Reset the receiver and denap state inherited from the
1937 * task's parent, but only if we are going to reset it via the
1938 * provided apptype.
1939 */
1940 if (task_is_importance_receiver(task)) {
1941 task_importance_mark_receiver(task, FALSE);
1942 }
1943 if (task_is_importance_denap_receiver(task)) {
1944 task_importance_mark_denap_receiver(task, FALSE);
1945 }
1946 }
1947
1948 switch (apptype) {
1949 case TASK_APPTYPE_APP_DEFAULT:
1950 /* Apps become donors via the 'live-donor' flag instead of the static donor flag */
1951 task_importance_mark_donor(task, FALSE);
1952 task_importance_mark_live_donor(task, TRUE);
1953 // importance_receiver == FALSE
1954 #if defined(XNU_TARGET_OS_OSX)
1955 /* Apps are de-nap recievers on macOS for suppression behaviors */
1956 task_importance_mark_denap_receiver(task, TRUE);
1957 #endif /* !defined(XNU_TARGET_OS_OSX) */
1958 break;
1959
1960 case TASK_APPTYPE_DAEMON_INTERACTIVE:
1961 task_importance_mark_donor(task, TRUE);
1962 task_importance_mark_live_donor(task, FALSE);
1963 // importance_denap_receiver == FALSE
1964
1965 /*
1966 * A boot arg controls whether interactive daemons are importance receivers.
1967 * Normally, they are not. But for testing their behavior as an adaptive
1968 * daemon, the boot-arg can be set.
1969 *
1970 * TODO: remove this when the interactive daemon audit period is over.
1971 */
1972 task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver);
1973 break;
1974
1975 case TASK_APPTYPE_DAEMON_STANDARD:
1976 task_importance_mark_donor(task, TRUE);
1977 task_importance_mark_live_donor(task, FALSE);
1978 // importance_denap_receiver == FALSE
1979 // importance_receiver == FALSE
1980 break;
1981
1982 case TASK_APPTYPE_DAEMON_ADAPTIVE:
1983 task_importance_mark_donor(task, FALSE);
1984 task_importance_mark_live_donor(task, FALSE);
1985 task_importance_mark_receiver(task, TRUE);
1986 // importance_denap_receiver == FALSE
1987 break;
1988
1989 case TASK_APPTYPE_DAEMON_BACKGROUND:
1990 task_importance_mark_donor(task, FALSE);
1991 task_importance_mark_live_donor(task, FALSE);
1992 // importance_denap_receiver == FALSE
1993 // importance_receiver == FALSE
1994 break;
1995
1996 case TASK_APPTYPE_DRIVER:
1997 task_importance_mark_donor(task, FALSE);
1998 task_importance_mark_live_donor(task, FALSE);
1999 // importance_denap_receiver == FALSE
2000 // importance_receiver == FALSE
2001 break;
2002
2003 case TASK_APPTYPE_NONE:
2004 break;
2005 }
2006
2007 if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2008 int portwatch_boosts = 0;
2009
2010 for (uint32_t i = 0; i < portwatch_count; i++) {
2011 ipc_port_t port = NULL;
2012
2013 if (IP_VALID(port = portwatch_ports[i])) {
2014 int boost = 0;
2015 task_add_importance_watchport(task, port, &boost);
2016 portwatch_boosts += boost;
2017 }
2018 }
2019
2020 if (portwatch_boosts > 0) {
2021 task_importance_hold_internal_assertion(task, portwatch_boosts);
2022 }
2023 }
2024
2025 /* Redirect the turnstile push of watchports to task */
2026 if (portwatch_count && portwatch_ports != NULL) {
2027 task_add_turnstile_watchports(task, thread, portwatch_ports, portwatch_count);
2028 }
2029
2030 task_lock(task);
2031
2032 if (apptype != TASK_APPTYPE_NONE) {
2033 task_set_requested_apptype(task, apptype, false);
2034 if (task_is_app(task)) {
2035 pend_token.tpt_update_tg_app_flag = 1;
2036 }
2037 }
2038
2039 #if !defined(XNU_TARGET_OS_OSX)
2040 /* Remove this after launchd starts setting it properly */
2041 if (apptype == TASK_APPTYPE_APP_DEFAULT && role == TASK_UNSPECIFIED) {
2042 task->requested_policy.trp_role = TASK_FOREGROUND_APPLICATION;
2043 } else
2044 #endif
2045 if (role != TASK_UNSPECIFIED) {
2046 task->requested_policy.trp_role = (uint32_t)role;
2047 }
2048
2049 if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2050 task->requested_policy.trp_qos_clamp = qos_clamp;
2051 }
2052
2053 task_policy_update_locked(task, &pend_token);
2054
2055 task_unlock(task);
2056
2057 /* Ensure the donor bit is updated to be in sync with the new live donor status */
2058 pend_token.tpt_update_live_donor = 1;
2059
2060 task_policy_update_complete_unlocked(task, &pend_token);
2061
2062 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2063 (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END,
2064 task_pid(task), trequested_0(task), trequested_1(task),
2065 task_is_importance_receiver(task), 0);
2066 }
2067
2068 /*
2069 * Inherit task role across exec
2070 */
2071 void
proc_inherit_task_role(task_t new_task,task_t old_task)2072 proc_inherit_task_role(task_t new_task,
2073 task_t old_task)
2074 {
2075 int role;
2076
2077 /* inherit the role from old task to new task */
2078 role = proc_get_task_policy(old_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
2079 proc_set_task_policy(new_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, role);
2080 }
2081
2082 extern void * XNU_PTRAUTH_SIGNED_PTR("initproc") initproc;
2083
2084 /*
2085 * Compute the default main thread qos for a task
2086 */
2087 thread_qos_t
task_compute_main_thread_qos(task_t task)2088 task_compute_main_thread_qos(task_t task)
2089 {
2090 thread_qos_t primordial_qos = THREAD_QOS_UNSPECIFIED;
2091
2092 thread_qos_t qos_clamp = task->requested_policy.trp_qos_clamp;
2093
2094 switch (task->requested_policy.trp_apptype) {
2095 case TASK_APPTYPE_APP_TAL:
2096 case TASK_APPTYPE_APP_DEFAULT:
2097 primordial_qos = THREAD_QOS_USER_INTERACTIVE;
2098 break;
2099
2100 case TASK_APPTYPE_DAEMON_INTERACTIVE:
2101 case TASK_APPTYPE_DAEMON_STANDARD:
2102 case TASK_APPTYPE_DAEMON_ADAPTIVE:
2103 case TASK_APPTYPE_DRIVER:
2104 primordial_qos = THREAD_QOS_LEGACY;
2105 break;
2106
2107 case TASK_APPTYPE_DAEMON_BACKGROUND:
2108 primordial_qos = THREAD_QOS_BACKGROUND;
2109 break;
2110 }
2111
2112 if (get_bsdtask_info(task) == initproc) {
2113 /* PID 1 gets a special case */
2114 primordial_qos = MAX(primordial_qos, THREAD_QOS_USER_INITIATED);
2115 }
2116
2117 if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2118 if (primordial_qos != THREAD_QOS_UNSPECIFIED) {
2119 primordial_qos = MIN(qos_clamp, primordial_qos);
2120 } else {
2121 primordial_qos = qos_clamp;
2122 }
2123 }
2124
2125 return primordial_qos;
2126 }
2127
2128
2129 /* for process_policy to check before attempting to set */
2130 boolean_t
proc_task_is_tal(task_t task)2131 proc_task_is_tal(task_t task)
2132 {
2133 return (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
2134 }
2135
2136 int
task_get_apptype(task_t task)2137 task_get_apptype(task_t task)
2138 {
2139 return task->requested_policy.trp_apptype;
2140 }
2141
2142 boolean_t
task_is_daemon(task_t task)2143 task_is_daemon(task_t task)
2144 {
2145 switch (task->requested_policy.trp_apptype) {
2146 case TASK_APPTYPE_DAEMON_INTERACTIVE:
2147 case TASK_APPTYPE_DAEMON_STANDARD:
2148 case TASK_APPTYPE_DAEMON_ADAPTIVE:
2149 case TASK_APPTYPE_DAEMON_BACKGROUND:
2150 return TRUE;
2151 default:
2152 return FALSE;
2153 }
2154 }
2155
2156 bool
task_is_driver(task_t task)2157 task_is_driver(task_t task)
2158 {
2159 if (!task) {
2160 return FALSE;
2161 }
2162 return task->requested_policy.trp_apptype == TASK_APPTYPE_DRIVER;
2163 }
2164
2165 boolean_t
task_is_app(task_t task)2166 task_is_app(task_t task)
2167 {
2168 switch (task->requested_policy.trp_apptype) {
2169 case TASK_APPTYPE_APP_DEFAULT:
2170 case TASK_APPTYPE_APP_TAL:
2171 return TRUE;
2172 default:
2173 return FALSE;
2174 }
2175 }
2176
2177
2178 /* for telemetry */
2179 integer_t
task_grab_latency_qos(task_t task)2180 task_grab_latency_qos(task_t task)
2181 {
2182 return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS));
2183 }
2184
2185 /* update the darwin background action state in the flags field for libproc */
2186 int
proc_get_darwinbgstate(task_t task,uint32_t * flagsp)2187 proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
2188 {
2189 if (task->requested_policy.trp_ext_darwinbg) {
2190 *flagsp |= PROC_FLAG_EXT_DARWINBG;
2191 }
2192
2193 if (task->requested_policy.trp_int_darwinbg) {
2194 *flagsp |= PROC_FLAG_DARWINBG;
2195 }
2196
2197 #if !defined(XNU_TARGET_OS_OSX)
2198 if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) {
2199 *flagsp |= PROC_FLAG_IOS_APPLEDAEMON;
2200 }
2201
2202 if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2203 *flagsp |= PROC_FLAG_IOS_IMPPROMOTION;
2204 }
2205 #endif /* !defined(XNU_TARGET_OS_OSX) */
2206
2207 if (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_DEFAULT ||
2208 task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) {
2209 *flagsp |= PROC_FLAG_APPLICATION;
2210 }
2211
2212 if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2213 *flagsp |= PROC_FLAG_ADAPTIVE;
2214 }
2215
2216 if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
2217 task->requested_policy.trp_boosted == 1) {
2218 *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT;
2219 }
2220
2221 if (task_is_importance_donor(task)) {
2222 *flagsp |= PROC_FLAG_IMPORTANCE_DONOR;
2223 }
2224
2225 if (task->effective_policy.tep_sup_active) {
2226 *flagsp |= PROC_FLAG_SUPPRESSED;
2227 }
2228
2229 return 0;
2230 }
2231
2232 /*
2233 * Tracepoint data... Reading the tracepoint data can be somewhat complicated.
2234 * The current scheme packs as much data into a single tracepoint as it can.
2235 *
2236 * Each task/thread requested/effective structure is 64 bits in size. Any
2237 * given tracepoint will emit either requested or effective data, but not both.
2238 *
2239 * A tracepoint may emit any of task, thread, or task & thread data.
2240 *
2241 * The type of data emitted varies with pointer size. Where possible, both
2242 * task and thread data are emitted. In LP32 systems, the first and second
2243 * halves of either the task or thread data is emitted.
2244 *
2245 * The code uses uintptr_t array indexes instead of high/low to avoid
2246 * confusion WRT big vs little endian.
2247 *
2248 * The truth table for the tracepoint data functions is below, and has the
2249 * following invariants:
2250 *
2251 * 1) task and thread are uintptr_t*
2252 * 2) task may never be NULL
2253 *
2254 *
2255 * LP32 LP64
2256 * trequested_0(task, NULL) task[0] task[0]
2257 * trequested_1(task, NULL) task[1] NULL
2258 * trequested_0(task, thread) thread[0] task[0]
2259 * trequested_1(task, thread) thread[1] thread[0]
2260 *
2261 * Basically, you get a full task or thread on LP32, and both on LP64.
2262 *
2263 * The uintptr_t munging here is squicky enough to deserve a comment.
2264 *
2265 * The variables we are accessing are laid out in memory like this:
2266 *
2267 * [ LP64 uintptr_t 0 ]
2268 * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ]
2269 *
2270 * 1 2 3 4 5 6 7 8
2271 *
2272 */
2273
2274 static uintptr_t
trequested_0(task_t task)2275 trequested_0(task_t task)
2276 {
2277 static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2278
2279 uintptr_t* raw = (uintptr_t*)&task->requested_policy;
2280
2281 return raw[0];
2282 }
2283
2284 static uintptr_t
trequested_1(task_t task)2285 trequested_1(task_t task)
2286 {
2287 #if defined __LP64__
2288 (void)task;
2289 return 0;
2290 #else
2291 uintptr_t* raw = (uintptr_t*)(&task->requested_policy);
2292 return raw[1];
2293 #endif
2294 }
2295
2296 static uintptr_t
teffective_0(task_t task)2297 teffective_0(task_t task)
2298 {
2299 uintptr_t* raw = (uintptr_t*)&task->effective_policy;
2300
2301 return raw[0];
2302 }
2303
2304 static uintptr_t
teffective_1(task_t task)2305 teffective_1(task_t task)
2306 {
2307 #if defined __LP64__
2308 (void)task;
2309 return 0;
2310 #else
2311 uintptr_t* raw = (uintptr_t*)(&task->effective_policy);
2312 return raw[1];
2313 #endif
2314 }
2315
2316 /* dump pending for tracepoint */
2317 uint32_t
tpending(task_pend_token_t pend_token)2318 tpending(task_pend_token_t pend_token)
2319 {
2320 return *(uint32_t*)(void*)(pend_token);
2321 }
2322
2323 uint64_t
task_requested_bitfield(task_t task)2324 task_requested_bitfield(task_t task)
2325 {
2326 uint64_t bits = 0;
2327 struct task_requested_policy requested = task->requested_policy;
2328
2329 bits |= (requested.trp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2330 bits |= (requested.trp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2331 bits |= (requested.trp_int_iotier ? (((uint64_t)requested.trp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2332 bits |= (requested.trp_ext_iotier ? (((uint64_t)requested.trp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2333 bits |= (requested.trp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2334 bits |= (requested.trp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2335 bits |= (requested.trp_bg_iotier ? (((uint64_t)requested.trp_bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT) : 0);
2336 bits |= (requested.trp_terminated ? POLICY_REQ_TERMINATED : 0);
2337
2338 bits |= (requested.trp_boosted ? POLICY_REQ_BOOSTED : 0);
2339 bits |= (requested.trp_tal_enabled ? POLICY_REQ_TAL_ENABLED : 0);
2340 bits |= (requested.trp_apptype ? (((uint64_t)requested.trp_apptype) << POLICY_REQ_APPTYPE_SHIFT) : 0);
2341 bits |= (requested.trp_role ? (((uint64_t)requested.trp_role) << POLICY_REQ_ROLE_SHIFT) : 0);
2342
2343 bits |= (requested.trp_sup_active ? POLICY_REQ_SUP_ACTIVE : 0);
2344 bits |= (requested.trp_sup_lowpri_cpu ? POLICY_REQ_SUP_LOWPRI_CPU : 0);
2345 bits |= (requested.trp_sup_cpu ? POLICY_REQ_SUP_CPU : 0);
2346 bits |= (requested.trp_sup_timer ? (((uint64_t)requested.trp_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0);
2347 bits |= (requested.trp_sup_throughput ? (((uint64_t)requested.trp_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT) : 0);
2348 bits |= (requested.trp_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : 0);
2349 bits |= (requested.trp_sup_bg_sockets ? POLICY_REQ_SUP_BG_SOCKETS : 0);
2350
2351 bits |= (requested.trp_base_latency_qos ? (((uint64_t)requested.trp_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2352 bits |= (requested.trp_over_latency_qos ? (((uint64_t)requested.trp_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0);
2353 bits |= (requested.trp_base_through_qos ? (((uint64_t)requested.trp_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2354 bits |= (requested.trp_over_through_qos ? (((uint64_t)requested.trp_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0);
2355 bits |= (requested.trp_sfi_managed ? POLICY_REQ_SFI_MANAGED : 0);
2356 bits |= (requested.trp_qos_clamp ? (((uint64_t)requested.trp_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT) : 0);
2357
2358 return bits;
2359 }
2360
2361 uint64_t
task_effective_bitfield(task_t task)2362 task_effective_bitfield(task_t task)
2363 {
2364 uint64_t bits = 0;
2365 struct task_effective_policy effective = task->effective_policy;
2366
2367 bits |= (effective.tep_io_tier ? (((uint64_t)effective.tep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2368 bits |= (effective.tep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2369 bits |= (effective.tep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2370 bits |= (effective.tep_lowpri_cpu ? POLICY_EFF_LOWPRI_CPU : 0);
2371 bits |= (effective.tep_terminated ? POLICY_EFF_TERMINATED : 0);
2372 bits |= (effective.tep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2373 bits |= (effective.tep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2374 bits |= (effective.tep_bg_iotier ? (((uint64_t)effective.tep_bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0);
2375 bits |= (effective.tep_qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : 0);
2376
2377 bits |= (effective.tep_tal_engaged ? POLICY_EFF_TAL_ENGAGED : 0);
2378 bits |= (effective.tep_watchers_bg ? POLICY_EFF_WATCHERS_BG : 0);
2379 bits |= (effective.tep_sup_active ? POLICY_EFF_SUP_ACTIVE : 0);
2380 bits |= (effective.tep_suppressed_cpu ? POLICY_EFF_SUP_CPU : 0);
2381 bits |= (effective.tep_role ? (((uint64_t)effective.tep_role) << POLICY_EFF_ROLE_SHIFT) : 0);
2382 bits |= (effective.tep_latency_qos ? (((uint64_t)effective.tep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2383 bits |= (effective.tep_through_qos ? (((uint64_t)effective.tep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2384 bits |= (effective.tep_sfi_managed ? POLICY_EFF_SFI_MANAGED : 0);
2385 bits |= (effective.tep_qos_ceiling ? (((uint64_t)effective.tep_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0);
2386
2387 return bits;
2388 }
2389
2390
2391 /*
2392 * Resource usage and CPU related routines
2393 */
2394
2395 int
proc_get_task_ruse_cpu(task_t task,uint32_t * policyp,uint8_t * percentagep,uint64_t * intervalp,uint64_t * deadlinep)2396 proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep)
2397 {
2398 int error = 0;
2399 int scope;
2400
2401 task_lock(task);
2402
2403
2404 error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope);
2405 task_unlock(task);
2406
2407 /*
2408 * Reverse-map from CPU resource limit scopes back to policies (see comment below).
2409 */
2410 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2411 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC;
2412 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2413 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE;
2414 } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) {
2415 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2416 }
2417
2418 return error;
2419 }
2420
2421 /*
2422 * Configure the default CPU usage monitor parameters.
2423 *
2424 * For tasks which have this mechanism activated: if any thread in the
2425 * process consumes more CPU than this, an EXC_RESOURCE exception will be generated.
2426 */
2427 void
proc_init_cpumon_params(void)2428 proc_init_cpumon_params(void)
2429 {
2430 /*
2431 * The max CPU percentage can be configured via the boot-args and
2432 * a key in the device tree. The boot-args are honored first, then the
2433 * device tree.
2434 */
2435 if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage,
2436 sizeof(proc_max_cpumon_percentage))) {
2437 uint64_t max_percentage = 0ULL;
2438
2439 if (!PE_get_default("kern.max_cpumon_percentage", &max_percentage,
2440 sizeof(max_percentage))) {
2441 max_percentage = DEFAULT_CPUMON_PERCENTAGE;
2442 }
2443
2444 assert(max_percentage <= UINT8_MAX);
2445 proc_max_cpumon_percentage = (uint8_t) max_percentage;
2446 }
2447
2448 if (proc_max_cpumon_percentage > 100) {
2449 proc_max_cpumon_percentage = 100;
2450 }
2451
2452 /*
2453 * The interval should be specified in seconds.
2454 *
2455 * Like the max CPU percentage, the max CPU interval can be configured
2456 * via boot-args and the device tree.
2457 */
2458 if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval,
2459 sizeof(proc_max_cpumon_interval))) {
2460 if (!PE_get_default("kern.max_cpumon_interval", &proc_max_cpumon_interval,
2461 sizeof(proc_max_cpumon_interval))) {
2462 proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
2463 }
2464 }
2465
2466 proc_max_cpumon_interval *= NSEC_PER_SEC;
2467
2468 /* TEMPORARY boot arg to control App suppression */
2469 PE_parse_boot_argn("task_policy_suppression_flags",
2470 &task_policy_suppression_flags,
2471 sizeof(task_policy_suppression_flags));
2472
2473 /* adjust suppression disk policy if called for in boot arg */
2474 if (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_IOTIER2) {
2475 proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER2;
2476 }
2477 }
2478
2479 /*
2480 * Currently supported configurations for CPU limits.
2481 *
2482 * Policy | Deadline-based CPU limit | Percentage-based CPU limit
2483 * -------------------------------------+--------------------------+------------------------------
2484 * PROC_POLICY_RSRCACT_THROTTLE | ENOTSUP | Task-wide scope only
2485 * PROC_POLICY_RSRCACT_SUSPEND | Task-wide scope only | ENOTSUP
2486 * PROC_POLICY_RSRCACT_TERMINATE | Task-wide scope only | ENOTSUP
2487 * PROC_POLICY_RSRCACT_NOTIFY_KQ | Task-wide scope only | ENOTSUP
2488 * PROC_POLICY_RSRCACT_NOTIFY_EXC | ENOTSUP | Per-thread scope only
2489 *
2490 * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
2491 * after the specified amount of wallclock time has elapsed.
2492 *
2493 * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
2494 * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
2495 * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
2496 * in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
2497 *
2498 * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
2499 * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
2500 * after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
2501 * but the potential consumer of the API at the time was insisting on wallclock time instead.
2502 *
2503 * Currently, requesting notification via an exception is the only way to get per-thread scope for a
2504 * CPU limit. All other types of notifications force task-wide scope for the limit.
2505 */
2506 int
proc_set_task_ruse_cpu(task_t task,uint16_t policy,uint8_t percentage,uint64_t interval,uint64_t deadline,int cpumon_entitled)2507 proc_set_task_ruse_cpu(task_t task, uint16_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline,
2508 int cpumon_entitled)
2509 {
2510 int error = 0;
2511 int scope;
2512
2513 /*
2514 * Enforce the matrix of supported configurations for policy, percentage, and deadline.
2515 */
2516 switch (policy) {
2517 // If no policy is explicitly given, the default is to throttle.
2518 case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
2519 case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
2520 if (deadline != 0) {
2521 return ENOTSUP;
2522 }
2523 scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2524 break;
2525 case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
2526 case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
2527 case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
2528 if (percentage != 0) {
2529 return ENOTSUP;
2530 }
2531 scope = TASK_RUSECPU_FLAGS_DEADLINE;
2532 break;
2533 case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
2534 if (deadline != 0) {
2535 return ENOTSUP;
2536 }
2537 scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2538 #ifdef CONFIG_NOMONITORS
2539 return error;
2540 #endif /* CONFIG_NOMONITORS */
2541 break;
2542 default:
2543 return EINVAL;
2544 }
2545
2546 task_lock(task);
2547 if (task != current_task()) {
2548 task->policy_ru_cpu_ext = policy;
2549 } else {
2550 task->policy_ru_cpu = policy;
2551 }
2552 error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled);
2553 task_unlock(task);
2554 return error;
2555 }
2556
2557 /* TODO: get rid of these */
2558 #define TASK_POLICY_CPU_RESOURCE_USAGE 0
2559 #define TASK_POLICY_WIREDMEM_RESOURCE_USAGE 1
2560 #define TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE 2
2561 #define TASK_POLICY_DISK_RESOURCE_USAGE 3
2562 #define TASK_POLICY_NETWORK_RESOURCE_USAGE 4
2563 #define TASK_POLICY_POWER_RESOURCE_USAGE 5
2564
2565 #define TASK_POLICY_RESOURCE_USAGE_COUNT 6
2566
2567 int
proc_clear_task_ruse_cpu(task_t task,int cpumon_entitled)2568 proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled)
2569 {
2570 int error = 0;
2571 int action;
2572 void * bsdinfo = NULL;
2573
2574 task_lock(task);
2575 if (task != current_task()) {
2576 task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2577 } else {
2578 task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2579 }
2580
2581 error = task_clear_cpuusage_locked(task, cpumon_entitled);
2582 if (error != 0) {
2583 goto out;
2584 }
2585
2586 action = task->applied_ru_cpu;
2587 if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2588 /* reset action */
2589 task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2590 }
2591 if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2592 bsdinfo = get_bsdtask_info(task);
2593 task_unlock(task);
2594 proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2595 goto out1;
2596 }
2597
2598 out:
2599 task_unlock(task);
2600 out1:
2601 return error;
2602 }
2603
2604 /* used to apply resource limit related actions */
2605 static int
task_apply_resource_actions(task_t task,int type)2606 task_apply_resource_actions(task_t task, int type)
2607 {
2608 int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2609 void * bsdinfo = NULL;
2610
2611 switch (type) {
2612 case TASK_POLICY_CPU_RESOURCE_USAGE:
2613 break;
2614 case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
2615 case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
2616 case TASK_POLICY_DISK_RESOURCE_USAGE:
2617 case TASK_POLICY_NETWORK_RESOURCE_USAGE:
2618 case TASK_POLICY_POWER_RESOURCE_USAGE:
2619 return 0;
2620
2621 default:
2622 return 1;
2623 }
2624 ;
2625
2626 /* only cpu actions for now */
2627 task_lock(task);
2628
2629 if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2630 /* apply action */
2631 task->applied_ru_cpu_ext = task->policy_ru_cpu_ext;
2632 action = task->applied_ru_cpu_ext;
2633 } else {
2634 action = task->applied_ru_cpu_ext;
2635 }
2636
2637 if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2638 bsdinfo = get_bsdtask_info(task);
2639 task_unlock(task);
2640 proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2641 } else {
2642 task_unlock(task);
2643 }
2644
2645 return 0;
2646 }
2647
2648 /*
2649 * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API
2650 * only allows for one at a time. This means that if there is a per-thread limit active, the other
2651 * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest
2652 * to the caller, and prefer that, but there's no need for that at the moment.
2653 */
2654 static int
task_get_cpuusage(task_t task,uint8_t * percentagep,uint64_t * intervalp,uint64_t * deadlinep,int * scope)2655 task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope)
2656 {
2657 *percentagep = 0;
2658 *intervalp = 0;
2659 *deadlinep = 0;
2660
2661 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) {
2662 *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2663 *percentagep = task->rusage_cpu_perthr_percentage;
2664 *intervalp = task->rusage_cpu_perthr_interval;
2665 } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) {
2666 *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2667 *percentagep = task->rusage_cpu_percentage;
2668 *intervalp = task->rusage_cpu_interval;
2669 } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) {
2670 *scope = TASK_RUSECPU_FLAGS_DEADLINE;
2671 *deadlinep = task->rusage_cpu_deadline;
2672 } else {
2673 *scope = 0;
2674 }
2675
2676 return 0;
2677 }
2678
2679 /*
2680 * Suspend the CPU usage monitor for the task. Return value indicates
2681 * if the mechanism was actually enabled.
2682 */
2683 int
task_suspend_cpumon(task_t task)2684 task_suspend_cpumon(task_t task)
2685 {
2686 thread_t thread;
2687
2688 task_lock_assert_owned(task);
2689
2690 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
2691 return KERN_INVALID_ARGUMENT;
2692 }
2693
2694 #if CONFIG_TELEMETRY
2695 /*
2696 * Disable task-wide telemetry if it was ever enabled by the CPU usage
2697 * monitor's warning zone.
2698 */
2699 telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0);
2700 #endif
2701
2702 /*
2703 * Suspend monitoring for the task, and propagate that change to each thread.
2704 */
2705 task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON);
2706 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2707 act_set_astledger(thread);
2708 }
2709
2710 return KERN_SUCCESS;
2711 }
2712
2713 /*
2714 * Remove all traces of the CPU monitor.
2715 */
2716 int
task_disable_cpumon(task_t task)2717 task_disable_cpumon(task_t task)
2718 {
2719 int kret;
2720
2721 task_lock_assert_owned(task);
2722
2723 kret = task_suspend_cpumon(task);
2724 if (kret) {
2725 return kret;
2726 }
2727
2728 /* Once we clear these values, the monitor can't be resumed */
2729 task->rusage_cpu_perthr_percentage = 0;
2730 task->rusage_cpu_perthr_interval = 0;
2731
2732 return KERN_SUCCESS;
2733 }
2734
2735
2736 static int
task_enable_cpumon_locked(task_t task)2737 task_enable_cpumon_locked(task_t task)
2738 {
2739 thread_t thread;
2740 task_lock_assert_owned(task);
2741
2742 if (task->rusage_cpu_perthr_percentage == 0 ||
2743 task->rusage_cpu_perthr_interval == 0) {
2744 return KERN_INVALID_ARGUMENT;
2745 }
2746
2747 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2748 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2749 act_set_astledger(thread);
2750 }
2751
2752 return KERN_SUCCESS;
2753 }
2754
2755 int
task_resume_cpumon(task_t task)2756 task_resume_cpumon(task_t task)
2757 {
2758 kern_return_t kret;
2759
2760 if (!task) {
2761 return EINVAL;
2762 }
2763
2764 task_lock(task);
2765 kret = task_enable_cpumon_locked(task);
2766 task_unlock(task);
2767
2768 return kret;
2769 }
2770
2771
2772 /* duplicate values from bsd/sys/process_policy.h */
2773 #define PROC_POLICY_CPUMON_DISABLE 0xFF
2774 #define PROC_POLICY_CPUMON_DEFAULTS 0xFE
2775
2776 static int
task_set_cpuusage(task_t task,uint8_t percentage,uint64_t interval,uint64_t deadline,int scope,int cpumon_entitled)2777 task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled)
2778 {
2779 uint64_t abstime = 0;
2780 uint64_t limittime = 0;
2781
2782 lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED);
2783
2784 /* By default, refill once per second */
2785 if (interval == 0) {
2786 interval = NSEC_PER_SEC;
2787 }
2788
2789 if (percentage != 0) {
2790 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2791 boolean_t warn = FALSE;
2792
2793 /*
2794 * A per-thread CPU limit on a task generates an exception
2795 * (LEDGER_ACTION_EXCEPTION) if any one thread in the task
2796 * exceeds the limit.
2797 */
2798
2799 if (percentage == PROC_POLICY_CPUMON_DISABLE) {
2800 if (cpumon_entitled) {
2801 /* 25095698 - task_disable_cpumon() should be reliable */
2802 task_disable_cpumon(task);
2803 return 0;
2804 }
2805
2806 /*
2807 * This task wishes to disable the CPU usage monitor, but it's
2808 * missing the required entitlement:
2809 * com.apple.private.kernel.override-cpumon
2810 *
2811 * Instead, treat this as a request to reset its params
2812 * back to the defaults.
2813 */
2814 warn = TRUE;
2815 percentage = PROC_POLICY_CPUMON_DEFAULTS;
2816 }
2817
2818 if (percentage == PROC_POLICY_CPUMON_DEFAULTS) {
2819 percentage = proc_max_cpumon_percentage;
2820 interval = proc_max_cpumon_interval;
2821 }
2822
2823 if (percentage > 100) {
2824 percentage = 100;
2825 }
2826
2827 /*
2828 * Passing in an interval of -1 means either:
2829 * - Leave the interval as-is, if there's already a per-thread
2830 * limit configured
2831 * - Use the system default.
2832 */
2833 if (interval == -1ULL) {
2834 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2835 interval = task->rusage_cpu_perthr_interval;
2836 } else {
2837 interval = proc_max_cpumon_interval;
2838 }
2839 }
2840
2841 /*
2842 * Enforce global caps on CPU usage monitor here if the process is not
2843 * entitled to escape the global caps.
2844 */
2845 if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) {
2846 warn = TRUE;
2847 percentage = proc_max_cpumon_percentage;
2848 }
2849
2850 if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) {
2851 warn = TRUE;
2852 interval = proc_max_cpumon_interval;
2853 }
2854
2855 if (warn) {
2856 int pid = 0;
2857 const char *procname = "unknown";
2858
2859 #ifdef MACH_BSD
2860 pid = proc_selfpid();
2861 void *cur_bsd_info = get_bsdtask_info(current_task());
2862 if (cur_bsd_info != NULL) {
2863 procname = proc_name_address(cur_bsd_info);
2864 }
2865 #endif
2866
2867 printf("process %s[%d] denied attempt to escape CPU monitor"
2868 " (missing required entitlement).\n", procname, pid);
2869 }
2870
2871 /* configure the limit values */
2872 task->rusage_cpu_perthr_percentage = percentage;
2873 task->rusage_cpu_perthr_interval = interval;
2874
2875 /* and enable the CPU monitor */
2876 (void)task_enable_cpumon_locked(task);
2877 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2878 /*
2879 * Currently, a proc-wide CPU limit always blocks if the limit is
2880 * exceeded (LEDGER_ACTION_BLOCK).
2881 */
2882 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT;
2883 task->rusage_cpu_percentage = percentage;
2884 task->rusage_cpu_interval = interval;
2885
2886 limittime = (interval * percentage) / 100;
2887 nanoseconds_to_absolutetime(limittime, &abstime);
2888
2889 ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0);
2890 ledger_set_period(task->ledger, task_ledgers.cpu_time, interval);
2891 ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2892 }
2893 }
2894
2895 if (deadline != 0) {
2896 assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
2897
2898 /* if already in use, cancel and wait for it to cleanout */
2899 if (task->rusage_cpu_callt != NULL) {
2900 task_unlock(task);
2901 thread_call_cancel_wait(task->rusage_cpu_callt);
2902 task_lock(task);
2903 }
2904 if (task->rusage_cpu_callt == NULL) {
2905 task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL);
2906 }
2907 /* setup callout */
2908 if (task->rusage_cpu_callt != 0) {
2909 uint64_t save_abstime = 0;
2910
2911 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE;
2912 task->rusage_cpu_deadline = deadline;
2913
2914 nanoseconds_to_absolutetime(deadline, &abstime);
2915 save_abstime = abstime;
2916 clock_absolutetime_interval_to_deadline(save_abstime, &abstime);
2917 thread_call_enter_delayed(task->rusage_cpu_callt, abstime);
2918 }
2919 }
2920
2921 return 0;
2922 }
2923
2924 int
task_clear_cpuusage(task_t task,int cpumon_entitled)2925 task_clear_cpuusage(task_t task, int cpumon_entitled)
2926 {
2927 int retval = 0;
2928
2929 task_lock(task);
2930 retval = task_clear_cpuusage_locked(task, cpumon_entitled);
2931 task_unlock(task);
2932
2933 return retval;
2934 }
2935
2936 static int
task_clear_cpuusage_locked(task_t task,int cpumon_entitled)2937 task_clear_cpuusage_locked(task_t task, int cpumon_entitled)
2938 {
2939 thread_call_t savecallt;
2940
2941 /* cancel percentage handling if set */
2942 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2943 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;
2944 ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2945 task->rusage_cpu_percentage = 0;
2946 task->rusage_cpu_interval = 0;
2947 }
2948
2949 /*
2950 * Disable the CPU usage monitor.
2951 */
2952 if (cpumon_entitled) {
2953 task_disable_cpumon(task);
2954 }
2955
2956 /* cancel deadline handling if set */
2957 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
2958 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
2959 if (task->rusage_cpu_callt != 0) {
2960 savecallt = task->rusage_cpu_callt;
2961 task->rusage_cpu_callt = NULL;
2962 task->rusage_cpu_deadline = 0;
2963 task_unlock(task);
2964 thread_call_cancel_wait(savecallt);
2965 thread_call_free(savecallt);
2966 task_lock(task);
2967 }
2968 }
2969 return 0;
2970 }
2971
2972 /* called by ledger unit to enforce action due to resource usage criteria being met */
2973 static void
task_action_cpuusage(thread_call_param_t param0,__unused thread_call_param_t param1)2974 task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
2975 {
2976 task_t task = (task_t)param0;
2977 (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
2978 return;
2979 }
2980
2981
2982 /*
2983 * Routines for taskwatch and pidbind
2984 */
2985
2986 #if CONFIG_TASKWATCH
2987
2988 LCK_MTX_DECLARE_ATTR(task_watch_mtx, &task_lck_grp, &task_lck_attr);
2989
2990 static void
task_watch_lock(void)2991 task_watch_lock(void)
2992 {
2993 lck_mtx_lock(&task_watch_mtx);
2994 }
2995
2996 static void
task_watch_unlock(void)2997 task_watch_unlock(void)
2998 {
2999 lck_mtx_unlock(&task_watch_mtx);
3000 }
3001
3002 static void
add_taskwatch_locked(task_t task,task_watch_t * twp)3003 add_taskwatch_locked(task_t task, task_watch_t * twp)
3004 {
3005 queue_enter(&task->task_watchers, twp, task_watch_t *, tw_links);
3006 task->num_taskwatchers++;
3007 }
3008
3009 static void
remove_taskwatch_locked(task_t task,task_watch_t * twp)3010 remove_taskwatch_locked(task_t task, task_watch_t * twp)
3011 {
3012 queue_remove(&task->task_watchers, twp, task_watch_t *, tw_links);
3013 task->num_taskwatchers--;
3014 }
3015
3016
3017 int
proc_lf_pidbind(task_t curtask,uint64_t tid,task_t target_task,int bind)3018 proc_lf_pidbind(task_t curtask, uint64_t tid, task_t target_task, int bind)
3019 {
3020 thread_t target_thread = NULL;
3021 int ret = 0, setbg = 0;
3022 task_watch_t *twp = NULL;
3023 task_t task = TASK_NULL;
3024
3025 target_thread = task_findtid(curtask, tid);
3026 if (target_thread == NULL) {
3027 return ESRCH;
3028 }
3029 /* holds thread reference */
3030
3031 if (bind != 0) {
3032 /* task is still active ? */
3033 task_lock(target_task);
3034 if (target_task->active == 0) {
3035 task_unlock(target_task);
3036 ret = ESRCH;
3037 goto out;
3038 }
3039 task_unlock(target_task);
3040
3041 twp = kalloc_type(task_watch_t, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3042
3043 task_watch_lock();
3044
3045 if (target_thread->taskwatch != NULL) {
3046 /* already bound to another task */
3047 task_watch_unlock();
3048
3049 kfree_type(task_watch_t, twp);
3050 ret = EBUSY;
3051 goto out;
3052 }
3053
3054 task_reference(target_task);
3055
3056 setbg = proc_get_effective_task_policy(target_task, TASK_POLICY_WATCHERS_BG);
3057
3058 twp->tw_task = target_task; /* holds the task reference */
3059 twp->tw_thread = target_thread; /* holds the thread reference */
3060 twp->tw_state = setbg;
3061 twp->tw_importance = target_thread->importance;
3062
3063 add_taskwatch_locked(target_task, twp);
3064
3065 target_thread->taskwatch = twp;
3066
3067 task_watch_unlock();
3068
3069 if (setbg) {
3070 set_thread_appbg(target_thread, setbg, INT_MIN);
3071 }
3072
3073 /* retain the thread reference as it is in twp */
3074 target_thread = NULL;
3075 } else {
3076 /* unbind */
3077 task_watch_lock();
3078 if ((twp = target_thread->taskwatch) != NULL) {
3079 task = twp->tw_task;
3080 target_thread->taskwatch = NULL;
3081 remove_taskwatch_locked(task, twp);
3082
3083 task_watch_unlock();
3084
3085 task_deallocate(task); /* drop task ref in twp */
3086 set_thread_appbg(target_thread, 0, twp->tw_importance);
3087 thread_deallocate(target_thread); /* drop thread ref in twp */
3088 kfree_type(task_watch_t, twp);
3089 } else {
3090 task_watch_unlock();
3091 ret = 0; /* return success if it not alredy bound */
3092 goto out;
3093 }
3094 }
3095 out:
3096 thread_deallocate(target_thread); /* drop thread ref acquired in this routine */
3097 return ret;
3098 }
3099
3100 static void
set_thread_appbg(thread_t thread,int setbg,__unused int importance)3101 set_thread_appbg(thread_t thread, int setbg, __unused int importance)
3102 {
3103 int enable = (setbg ? TASK_POLICY_ENABLE : TASK_POLICY_DISABLE);
3104
3105 proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_PIDBIND_BG, enable);
3106 }
3107
3108 static void
apply_appstate_watchers(task_t task)3109 apply_appstate_watchers(task_t task)
3110 {
3111 int numwatchers = 0, i, j, setbg;
3112 thread_watchlist_t * threadlist;
3113 task_watch_t * twp;
3114
3115 retry:
3116 /* if no watchers on the list return */
3117 if ((numwatchers = task->num_taskwatchers) == 0) {
3118 return;
3119 }
3120
3121 threadlist = kalloc_type(thread_watchlist_t, numwatchers, Z_WAITOK | Z_ZERO);
3122 if (threadlist == NULL) {
3123 return;
3124 }
3125
3126 task_watch_lock();
3127 /*serialize application of app state changes */
3128
3129 if (task->watchapplying != 0) {
3130 lck_mtx_sleep(&task_watch_mtx, LCK_SLEEP_DEFAULT, &task->watchapplying, THREAD_UNINT);
3131 task_watch_unlock();
3132 kfree_type(thread_watchlist_t, numwatchers, threadlist);
3133 goto retry;
3134 }
3135
3136 if (numwatchers != task->num_taskwatchers) {
3137 task_watch_unlock();
3138 kfree_type(thread_watchlist_t, numwatchers, threadlist);
3139 goto retry;
3140 }
3141
3142 setbg = proc_get_effective_task_policy(task, TASK_POLICY_WATCHERS_BG);
3143
3144 task->watchapplying = 1;
3145 i = 0;
3146 queue_iterate(&task->task_watchers, twp, task_watch_t *, tw_links) {
3147 threadlist[i].thread = twp->tw_thread;
3148 thread_reference(threadlist[i].thread);
3149 if (setbg != 0) {
3150 twp->tw_importance = twp->tw_thread->importance;
3151 threadlist[i].importance = INT_MIN;
3152 } else {
3153 threadlist[i].importance = twp->tw_importance;
3154 }
3155 i++;
3156 if (i > numwatchers) {
3157 break;
3158 }
3159 }
3160
3161 task_watch_unlock();
3162
3163 for (j = 0; j < i; j++) {
3164 set_thread_appbg(threadlist[j].thread, setbg, threadlist[j].importance);
3165 thread_deallocate(threadlist[j].thread);
3166 }
3167 kfree_type(thread_watchlist_t, numwatchers, threadlist);
3168
3169
3170 task_watch_lock();
3171 task->watchapplying = 0;
3172 thread_wakeup_one(&task->watchapplying);
3173 task_watch_unlock();
3174 }
3175
3176 void
thead_remove_taskwatch(thread_t thread)3177 thead_remove_taskwatch(thread_t thread)
3178 {
3179 task_watch_t * twp;
3180 int importance = 0;
3181
3182 task_watch_lock();
3183 if ((twp = thread->taskwatch) != NULL) {
3184 thread->taskwatch = NULL;
3185 remove_taskwatch_locked(twp->tw_task, twp);
3186 }
3187 task_watch_unlock();
3188 if (twp != NULL) {
3189 thread_deallocate(twp->tw_thread);
3190 task_deallocate(twp->tw_task);
3191 importance = twp->tw_importance;
3192 kfree_type(task_watch_t, twp);
3193 /* remove the thread and networkbg */
3194 set_thread_appbg(thread, 0, importance);
3195 }
3196 }
3197
3198 void
task_removewatchers(task_t task)3199 task_removewatchers(task_t task)
3200 {
3201 queue_head_t queue;
3202 task_watch_t *twp;
3203
3204 task_watch_lock();
3205 queue_new_head(&task->task_watchers, &queue, task_watch_t *, tw_links);
3206 queue_init(&task->task_watchers);
3207
3208 queue_iterate(&queue, twp, task_watch_t *, tw_links) {
3209 /*
3210 * Since the linkage is removed and thead state cleanup is already set up,
3211 * remove the refernce from the thread.
3212 */
3213 twp->tw_thread->taskwatch = NULL; /* removed linkage, clear thread holding ref */
3214 }
3215
3216 task->num_taskwatchers = 0;
3217 task_watch_unlock();
3218
3219 while (!queue_empty(&queue)) {
3220 queue_remove_first(&queue, twp, task_watch_t *, tw_links);
3221 /* remove thread and network bg */
3222 set_thread_appbg(twp->tw_thread, 0, twp->tw_importance);
3223 thread_deallocate(twp->tw_thread);
3224 task_deallocate(twp->tw_task);
3225 kfree_type(task_watch_t, twp);
3226 }
3227 }
3228 #endif /* CONFIG_TASKWATCH */
3229
3230 /*
3231 * Routines for importance donation/inheritance/boosting
3232 */
3233
3234 static void
task_importance_update_live_donor(task_t target_task)3235 task_importance_update_live_donor(task_t target_task)
3236 {
3237 #if IMPORTANCE_INHERITANCE
3238
3239 ipc_importance_task_t task_imp;
3240
3241 task_imp = ipc_importance_for_task(target_task, FALSE);
3242 if (IIT_NULL != task_imp) {
3243 ipc_importance_task_update_live_donor(task_imp);
3244 ipc_importance_task_release(task_imp);
3245 }
3246 #endif /* IMPORTANCE_INHERITANCE */
3247 }
3248
3249 void
task_importance_mark_donor(task_t task,boolean_t donating)3250 task_importance_mark_donor(task_t task, boolean_t donating)
3251 {
3252 #if IMPORTANCE_INHERITANCE
3253 ipc_importance_task_t task_imp;
3254
3255 task_imp = ipc_importance_for_task(task, FALSE);
3256 if (IIT_NULL != task_imp) {
3257 ipc_importance_task_mark_donor(task_imp, donating);
3258 ipc_importance_task_release(task_imp);
3259 }
3260 #endif /* IMPORTANCE_INHERITANCE */
3261 }
3262
3263 void
task_importance_mark_live_donor(task_t task,boolean_t live_donating)3264 task_importance_mark_live_donor(task_t task, boolean_t live_donating)
3265 {
3266 #if IMPORTANCE_INHERITANCE
3267 ipc_importance_task_t task_imp;
3268
3269 task_imp = ipc_importance_for_task(task, FALSE);
3270 if (IIT_NULL != task_imp) {
3271 ipc_importance_task_mark_live_donor(task_imp, live_donating);
3272 ipc_importance_task_release(task_imp);
3273 }
3274 #endif /* IMPORTANCE_INHERITANCE */
3275 }
3276
3277 void
task_importance_mark_receiver(task_t task,boolean_t receiving)3278 task_importance_mark_receiver(task_t task, boolean_t receiving)
3279 {
3280 #if IMPORTANCE_INHERITANCE
3281 ipc_importance_task_t task_imp;
3282
3283 task_imp = ipc_importance_for_task(task, FALSE);
3284 if (IIT_NULL != task_imp) {
3285 ipc_importance_task_mark_receiver(task_imp, receiving);
3286 ipc_importance_task_release(task_imp);
3287 }
3288 #endif /* IMPORTANCE_INHERITANCE */
3289 }
3290
3291 void
task_importance_mark_denap_receiver(task_t task,boolean_t denap)3292 task_importance_mark_denap_receiver(task_t task, boolean_t denap)
3293 {
3294 #if IMPORTANCE_INHERITANCE
3295 ipc_importance_task_t task_imp;
3296
3297 task_imp = ipc_importance_for_task(task, FALSE);
3298 if (IIT_NULL != task_imp) {
3299 ipc_importance_task_mark_denap_receiver(task_imp, denap);
3300 ipc_importance_task_release(task_imp);
3301 }
3302 #endif /* IMPORTANCE_INHERITANCE */
3303 }
3304
3305 void
task_importance_reset(__imp_only task_t task)3306 task_importance_reset(__imp_only task_t task)
3307 {
3308 #if IMPORTANCE_INHERITANCE
3309 ipc_importance_task_t task_imp;
3310
3311 /* TODO: Lower importance downstream before disconnect */
3312 task_imp = task->task_imp_base;
3313 ipc_importance_reset(task_imp, FALSE);
3314 task_importance_update_live_donor(task);
3315 #endif /* IMPORTANCE_INHERITANCE */
3316 }
3317
3318 void
task_importance_init_from_parent(__imp_only task_t new_task,__imp_only task_t parent_task)3319 task_importance_init_from_parent(__imp_only task_t new_task, __imp_only task_t parent_task)
3320 {
3321 #if IMPORTANCE_INHERITANCE
3322 ipc_importance_task_t new_task_imp = IIT_NULL;
3323
3324 new_task->task_imp_base = NULL;
3325 if (!parent_task) {
3326 return;
3327 }
3328
3329 if (task_is_marked_importance_donor(parent_task)) {
3330 new_task_imp = ipc_importance_for_task(new_task, FALSE);
3331 assert(IIT_NULL != new_task_imp);
3332 ipc_importance_task_mark_donor(new_task_imp, TRUE);
3333 }
3334 if (task_is_marked_live_importance_donor(parent_task)) {
3335 if (IIT_NULL == new_task_imp) {
3336 new_task_imp = ipc_importance_for_task(new_task, FALSE);
3337 }
3338 assert(IIT_NULL != new_task_imp);
3339 ipc_importance_task_mark_live_donor(new_task_imp, TRUE);
3340 }
3341 /* Do not inherit 'receiver' on fork, vfexec or true spawn */
3342 if (task_is_exec_copy(new_task) &&
3343 task_is_marked_importance_receiver(parent_task)) {
3344 if (IIT_NULL == new_task_imp) {
3345 new_task_imp = ipc_importance_for_task(new_task, FALSE);
3346 }
3347 assert(IIT_NULL != new_task_imp);
3348 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
3349 }
3350 if (task_is_marked_importance_denap_receiver(parent_task)) {
3351 if (IIT_NULL == new_task_imp) {
3352 new_task_imp = ipc_importance_for_task(new_task, FALSE);
3353 }
3354 assert(IIT_NULL != new_task_imp);
3355 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
3356 }
3357 if (IIT_NULL != new_task_imp) {
3358 assert(new_task->task_imp_base == new_task_imp);
3359 ipc_importance_task_release(new_task_imp);
3360 }
3361 #endif /* IMPORTANCE_INHERITANCE */
3362 }
3363
3364 #if IMPORTANCE_INHERITANCE
3365 /*
3366 * Sets the task boost bit to the provided value. Does NOT run the update function.
3367 *
3368 * Task lock must be held.
3369 */
3370 static void
task_set_boost_locked(task_t task,boolean_t boost_active)3371 task_set_boost_locked(task_t task, boolean_t boost_active)
3372 {
3373 #if IMPORTANCE_TRACE
3374 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START),
3375 proc_selfpid(), task_pid(task), trequested_0(task), trequested_1(task), 0);
3376 #endif /* IMPORTANCE_TRACE */
3377
3378 task->requested_policy.trp_boosted = boost_active;
3379
3380 #if IMPORTANCE_TRACE
3381 if (boost_active == TRUE) {
3382 DTRACE_BOOST2(boost, task_t, task, int, task_pid(task));
3383 } else {
3384 DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task));
3385 }
3386 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END),
3387 proc_selfpid(), task_pid(task),
3388 trequested_0(task), trequested_1(task), 0);
3389 #endif /* IMPORTANCE_TRACE */
3390 }
3391
3392 /*
3393 * Sets the task boost bit to the provided value and applies the update.
3394 *
3395 * Task lock must be held. Must call update complete after unlocking the task.
3396 */
3397 void
task_update_boost_locked(task_t task,boolean_t boost_active,task_pend_token_t pend_token)3398 task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token)
3399 {
3400 task_set_boost_locked(task, boost_active);
3401
3402 task_policy_update_locked(task, pend_token);
3403 }
3404
3405 /*
3406 * Check if this task should donate importance.
3407 *
3408 * May be called without taking the task lock. In that case, donor status can change
3409 * so you must check only once for each donation event.
3410 */
3411 boolean_t
task_is_importance_donor(task_t task)3412 task_is_importance_donor(task_t task)
3413 {
3414 if (task->task_imp_base == IIT_NULL) {
3415 return FALSE;
3416 }
3417 return ipc_importance_task_is_donor(task->task_imp_base);
3418 }
3419
3420 /*
3421 * Query the status of the task's donor mark.
3422 */
3423 boolean_t
task_is_marked_importance_donor(task_t task)3424 task_is_marked_importance_donor(task_t task)
3425 {
3426 if (task->task_imp_base == IIT_NULL) {
3427 return FALSE;
3428 }
3429 return ipc_importance_task_is_marked_donor(task->task_imp_base);
3430 }
3431
3432 /*
3433 * Query the status of the task's live donor and donor mark.
3434 */
3435 boolean_t
task_is_marked_live_importance_donor(task_t task)3436 task_is_marked_live_importance_donor(task_t task)
3437 {
3438 if (task->task_imp_base == IIT_NULL) {
3439 return FALSE;
3440 }
3441 return ipc_importance_task_is_marked_live_donor(task->task_imp_base);
3442 }
3443
3444
3445 /*
3446 * This routine may be called without holding task lock
3447 * since the value of imp_receiver can never be unset.
3448 */
3449 boolean_t
task_is_importance_receiver(task_t task)3450 task_is_importance_receiver(task_t task)
3451 {
3452 if (task->task_imp_base == IIT_NULL) {
3453 return FALSE;
3454 }
3455 return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3456 }
3457
3458 /*
3459 * Query the task's receiver mark.
3460 */
3461 boolean_t
task_is_marked_importance_receiver(task_t task)3462 task_is_marked_importance_receiver(task_t task)
3463 {
3464 if (task->task_imp_base == IIT_NULL) {
3465 return FALSE;
3466 }
3467 return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3468 }
3469
3470 /*
3471 * This routine may be called without holding task lock
3472 * since the value of de-nap receiver can never be unset.
3473 */
3474 boolean_t
task_is_importance_denap_receiver(task_t task)3475 task_is_importance_denap_receiver(task_t task)
3476 {
3477 if (task->task_imp_base == IIT_NULL) {
3478 return FALSE;
3479 }
3480 return ipc_importance_task_is_denap_receiver(task->task_imp_base);
3481 }
3482
3483 /*
3484 * Query the task's de-nap receiver mark.
3485 */
3486 boolean_t
task_is_marked_importance_denap_receiver(task_t task)3487 task_is_marked_importance_denap_receiver(task_t task)
3488 {
3489 if (task->task_imp_base == IIT_NULL) {
3490 return FALSE;
3491 }
3492 return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base);
3493 }
3494
3495 /*
3496 * This routine may be called without holding task lock
3497 * since the value of imp_receiver can never be unset.
3498 */
3499 boolean_t
task_is_importance_receiver_type(task_t task)3500 task_is_importance_receiver_type(task_t task)
3501 {
3502 if (task->task_imp_base == IIT_NULL) {
3503 return FALSE;
3504 }
3505 return task_is_importance_receiver(task) ||
3506 task_is_importance_denap_receiver(task);
3507 }
3508
3509 /*
3510 * External importance assertions are managed by the process in userspace
3511 * Internal importance assertions are the responsibility of the kernel
3512 * Assertions are changed from internal to external via task_importance_externalize_assertion
3513 */
3514
3515 int
task_importance_hold_internal_assertion(task_t target_task,uint32_t count)3516 task_importance_hold_internal_assertion(task_t target_task, uint32_t count)
3517 {
3518 ipc_importance_task_t task_imp;
3519 kern_return_t ret;
3520
3521 /* may be first time, so allow for possible importance setup */
3522 task_imp = ipc_importance_for_task(target_task, FALSE);
3523 if (IIT_NULL == task_imp) {
3524 return EOVERFLOW;
3525 }
3526 ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3527 ipc_importance_task_release(task_imp);
3528
3529 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3530 }
3531
3532 int
task_importance_hold_file_lock_assertion(task_t target_task,uint32_t count)3533 task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count)
3534 {
3535 ipc_importance_task_t task_imp;
3536 kern_return_t ret;
3537
3538 /* may be first time, so allow for possible importance setup */
3539 task_imp = ipc_importance_for_task(target_task, FALSE);
3540 if (IIT_NULL == task_imp) {
3541 return EOVERFLOW;
3542 }
3543 ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count);
3544 ipc_importance_task_release(task_imp);
3545
3546 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3547 }
3548
3549 int
task_importance_hold_legacy_external_assertion(task_t target_task,uint32_t count)3550 task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count)
3551 {
3552 ipc_importance_task_t task_imp;
3553 kern_return_t ret;
3554
3555 /* must already have set up an importance */
3556 task_imp = target_task->task_imp_base;
3557 if (IIT_NULL == task_imp) {
3558 return EOVERFLOW;
3559 }
3560 ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count);
3561 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3562 }
3563
3564 int
task_importance_drop_file_lock_assertion(task_t target_task,uint32_t count)3565 task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count)
3566 {
3567 ipc_importance_task_t task_imp;
3568 kern_return_t ret;
3569
3570 /* must already have set up an importance */
3571 task_imp = target_task->task_imp_base;
3572 if (IIT_NULL == task_imp) {
3573 return EOVERFLOW;
3574 }
3575 ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count);
3576 return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3577 }
3578
3579 int
task_importance_drop_legacy_external_assertion(task_t target_task,uint32_t count)3580 task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count)
3581 {
3582 ipc_importance_task_t task_imp;
3583 kern_return_t ret;
3584
3585 /* must already have set up an importance */
3586 task_imp = target_task->task_imp_base;
3587 if (IIT_NULL == task_imp) {
3588 return EOVERFLOW;
3589 }
3590 ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count);
3591 return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3592 }
3593
3594 static void
task_add_importance_watchport(task_t task,mach_port_t port,int * boostp)3595 task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
3596 {
3597 int boost = 0;
3598
3599 __imptrace_only int released_pid = 0;
3600 __imptrace_only int pid = task_pid(task);
3601
3602 ipc_importance_task_t release_imp_task = IIT_NULL;
3603
3604 if (IP_VALID(port) != 0) {
3605 ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE);
3606
3607 ip_mq_lock(port);
3608
3609 /*
3610 * The port must have been marked tempowner already.
3611 * This also filters out ports whose receive rights
3612 * are already enqueued in a message, as you can't
3613 * change the right's destination once it's already
3614 * on its way.
3615 */
3616 if (port->ip_tempowner != 0) {
3617 assert(port->ip_impdonation != 0);
3618
3619 boost = port->ip_impcount;
3620 if (IIT_NULL != ip_get_imp_task(port)) {
3621 /*
3622 * if this port is already bound to a task,
3623 * release the task reference and drop any
3624 * watchport-forwarded boosts
3625 */
3626 release_imp_task = ip_get_imp_task(port);
3627 port->ip_imp_task = IIT_NULL;
3628 }
3629
3630 /* mark the port is watching another task (reference held in port->ip_imp_task) */
3631 if (ipc_importance_task_is_marked_receiver(new_imp_task)) {
3632 port->ip_imp_task = new_imp_task;
3633 new_imp_task = IIT_NULL;
3634 }
3635 }
3636 ip_mq_unlock(port);
3637
3638 if (IIT_NULL != new_imp_task) {
3639 ipc_importance_task_release(new_imp_task);
3640 }
3641
3642 if (IIT_NULL != release_imp_task) {
3643 if (boost > 0) {
3644 ipc_importance_task_drop_internal_assertion(release_imp_task, boost);
3645 }
3646
3647 // released_pid = task_pid(release_imp_task); /* TODO: Need ref-safe way to get pid */
3648 ipc_importance_task_release(release_imp_task);
3649 }
3650 #if IMPORTANCE_TRACE
3651 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE,
3652 proc_selfpid(), pid, boost, released_pid, 0);
3653 #endif /* IMPORTANCE_TRACE */
3654 }
3655
3656 *boostp = boost;
3657 return;
3658 }
3659
3660 #endif /* IMPORTANCE_INHERITANCE */
3661
3662 /*
3663 * Routines for VM to query task importance
3664 */
3665
3666
3667 /*
3668 * Order to be considered while estimating importance
3669 * for low memory notification and purging purgeable memory.
3670 */
3671 #define TASK_IMPORTANCE_FOREGROUND 4
3672 #define TASK_IMPORTANCE_NOTDARWINBG 1
3673
3674
3675 /*
3676 * (Un)Mark the task as a privileged listener for memory notifications.
3677 * if marked, this task will be among the first to be notified amongst
3678 * the bulk of all other tasks when the system enters a pressure level
3679 * of interest to this task.
3680 */
3681 int
task_low_mem_privileged_listener(task_t task,boolean_t new_value,boolean_t * old_value)3682 task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value)
3683 {
3684 if (old_value != NULL) {
3685 *old_value = (boolean_t)task->low_mem_privileged_listener;
3686 } else {
3687 task_lock(task);
3688 task->low_mem_privileged_listener = (uint32_t)new_value;
3689 task_unlock(task);
3690 }
3691
3692 return 0;
3693 }
3694
3695 /*
3696 * Checks if the task is already notified.
3697 *
3698 * Condition: task lock should be held while calling this function.
3699 */
3700 boolean_t
task_has_been_notified(task_t task,int pressurelevel)3701 task_has_been_notified(task_t task, int pressurelevel)
3702 {
3703 if (task == NULL) {
3704 return FALSE;
3705 }
3706
3707 if (pressurelevel == kVMPressureWarning) {
3708 return task->low_mem_notified_warn ? TRUE : FALSE;
3709 } else if (pressurelevel == kVMPressureCritical) {
3710 return task->low_mem_notified_critical ? TRUE : FALSE;
3711 } else {
3712 return TRUE;
3713 }
3714 }
3715
3716
3717 /*
3718 * Checks if the task is used for purging.
3719 *
3720 * Condition: task lock should be held while calling this function.
3721 */
3722 boolean_t
task_used_for_purging(task_t task,int pressurelevel)3723 task_used_for_purging(task_t task, int pressurelevel)
3724 {
3725 if (task == NULL) {
3726 return FALSE;
3727 }
3728
3729 if (pressurelevel == kVMPressureWarning) {
3730 return task->purged_memory_warn ? TRUE : FALSE;
3731 } else if (pressurelevel == kVMPressureCritical) {
3732 return task->purged_memory_critical ? TRUE : FALSE;
3733 } else {
3734 return TRUE;
3735 }
3736 }
3737
3738
3739 /*
3740 * Mark the task as notified with memory notification.
3741 *
3742 * Condition: task lock should be held while calling this function.
3743 */
3744 void
task_mark_has_been_notified(task_t task,int pressurelevel)3745 task_mark_has_been_notified(task_t task, int pressurelevel)
3746 {
3747 if (task == NULL) {
3748 return;
3749 }
3750
3751 if (pressurelevel == kVMPressureWarning) {
3752 task->low_mem_notified_warn = 1;
3753 } else if (pressurelevel == kVMPressureCritical) {
3754 task->low_mem_notified_critical = 1;
3755 }
3756 }
3757
3758
3759 /*
3760 * Mark the task as purged.
3761 *
3762 * Condition: task lock should be held while calling this function.
3763 */
3764 void
task_mark_used_for_purging(task_t task,int pressurelevel)3765 task_mark_used_for_purging(task_t task, int pressurelevel)
3766 {
3767 if (task == NULL) {
3768 return;
3769 }
3770
3771 if (pressurelevel == kVMPressureWarning) {
3772 task->purged_memory_warn = 1;
3773 } else if (pressurelevel == kVMPressureCritical) {
3774 task->purged_memory_critical = 1;
3775 }
3776 }
3777
3778
3779 /*
3780 * Mark the task eligible for low memory notification.
3781 *
3782 * Condition: task lock should be held while calling this function.
3783 */
3784 void
task_clear_has_been_notified(task_t task,int pressurelevel)3785 task_clear_has_been_notified(task_t task, int pressurelevel)
3786 {
3787 if (task == NULL) {
3788 return;
3789 }
3790
3791 if (pressurelevel == kVMPressureWarning) {
3792 task->low_mem_notified_warn = 0;
3793 } else if (pressurelevel == kVMPressureCritical) {
3794 task->low_mem_notified_critical = 0;
3795 }
3796 }
3797
3798
3799 /*
3800 * Mark the task eligible for purging its purgeable memory.
3801 *
3802 * Condition: task lock should be held while calling this function.
3803 */
3804 void
task_clear_used_for_purging(task_t task)3805 task_clear_used_for_purging(task_t task)
3806 {
3807 if (task == NULL) {
3808 return;
3809 }
3810
3811 task->purged_memory_warn = 0;
3812 task->purged_memory_critical = 0;
3813 }
3814
3815
3816 /*
3817 * Estimate task importance for purging its purgeable memory
3818 * and low memory notification.
3819 *
3820 * Importance is calculated in the following order of criteria:
3821 * -Task role : Background vs Foreground
3822 * -Boost status: Not boosted vs Boosted
3823 * -Darwin BG status.
3824 *
3825 * Returns: Estimated task importance. Less important task will have lower
3826 * estimated importance.
3827 */
3828 int
task_importance_estimate(task_t task)3829 task_importance_estimate(task_t task)
3830 {
3831 int task_importance = 0;
3832
3833 if (task == NULL) {
3834 return 0;
3835 }
3836
3837 if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION) {
3838 task_importance += TASK_IMPORTANCE_FOREGROUND;
3839 }
3840
3841 if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0) {
3842 task_importance += TASK_IMPORTANCE_NOTDARWINBG;
3843 }
3844
3845 return task_importance;
3846 }
3847
3848 boolean_t
task_has_assertions(task_t task)3849 task_has_assertions(task_t task)
3850 {
3851 return task->task_imp_base->iit_assertcnt? TRUE : FALSE;
3852 }
3853
3854
3855 kern_return_t
send_resource_violation(typeof(send_cpu_usage_violation) sendfunc,task_t violator,struct ledger_entry_info * linfo,resource_notify_flags_t flags)3856 send_resource_violation(typeof(send_cpu_usage_violation) sendfunc,
3857 task_t violator,
3858 struct ledger_entry_info *linfo,
3859 resource_notify_flags_t flags)
3860 {
3861 #ifndef MACH_BSD
3862 return KERN_NOT_SUPPORTED;
3863 #else
3864 kern_return_t kr = KERN_SUCCESS;
3865 proc_t proc = NULL;
3866 posix_path_t proc_path = "";
3867 proc_name_t procname = "<unknown>";
3868 int pid = -1;
3869 clock_sec_t secs;
3870 clock_nsec_t nsecs;
3871 mach_timespec_t timestamp;
3872 thread_t curthread = current_thread();
3873 ipc_port_t dstport = MACH_PORT_NULL;
3874
3875 if (!violator) {
3876 kr = KERN_INVALID_ARGUMENT; goto finish;
3877 }
3878
3879 /* extract violator information */
3880 task_lock(violator);
3881 if (!(proc = get_bsdtask_info(violator))) {
3882 task_unlock(violator);
3883 kr = KERN_INVALID_ARGUMENT; goto finish;
3884 }
3885 (void)mig_strncpy(procname, proc_best_name(proc), sizeof(procname));
3886 pid = task_pid(violator);
3887 if (flags & kRNFatalLimitFlag) {
3888 kr = proc_pidpathinfo_internal(proc, 0, proc_path,
3889 sizeof(proc_path), NULL);
3890 }
3891 task_unlock(violator);
3892 if (kr) {
3893 goto finish;
3894 }
3895
3896 /* violation time ~ now */
3897 clock_get_calendar_nanotime(&secs, &nsecs);
3898 timestamp.tv_sec = (int32_t)secs;
3899 timestamp.tv_nsec = (int32_t)nsecs;
3900 /* 25567702 tracks widening mach_timespec_t */
3901
3902 /* send message */
3903 kr = host_get_special_port(host_priv_self(), HOST_LOCAL_NODE,
3904 HOST_RESOURCE_NOTIFY_PORT, &dstport);
3905 if (kr) {
3906 goto finish;
3907 }
3908
3909 thread_set_honor_qlimit(curthread);
3910 kr = sendfunc(dstport,
3911 procname, pid, proc_path, timestamp,
3912 linfo->lei_balance, linfo->lei_last_refill,
3913 linfo->lei_limit, linfo->lei_refill_period,
3914 flags);
3915 thread_clear_honor_qlimit(curthread);
3916
3917 ipc_port_release_send(dstport);
3918
3919 finish:
3920 return kr;
3921 #endif /* MACH_BSD */
3922 }
3923
3924 kern_return_t
send_resource_violation_with_fatal_port(typeof(send_port_space_violation) sendfunc,task_t violator,int64_t current_size,int64_t limit,mach_port_t fatal_port,resource_notify_flags_t flags)3925 send_resource_violation_with_fatal_port(typeof(send_port_space_violation) sendfunc,
3926 task_t violator,
3927 int64_t current_size,
3928 int64_t limit,
3929 mach_port_t fatal_port,
3930 resource_notify_flags_t flags)
3931 {
3932 #ifndef MACH_BSD
3933 kr = KERN_NOT_SUPPORTED; goto finish;
3934 #else
3935 kern_return_t kr = KERN_SUCCESS;
3936 proc_t proc = NULL;
3937 proc_name_t procname = "<unknown>";
3938 int pid = -1;
3939 clock_sec_t secs;
3940 clock_nsec_t nsecs;
3941 mach_timespec_t timestamp;
3942 thread_t curthread = current_thread();
3943 ipc_port_t dstport = MACH_PORT_NULL;
3944
3945 if (!violator) {
3946 kr = KERN_INVALID_ARGUMENT; goto finish;
3947 }
3948
3949 /* extract violator information; no need to acquire task lock */
3950 assert(violator == current_task());
3951 if (!(proc = get_bsdtask_info(violator))) {
3952 kr = KERN_INVALID_ARGUMENT; goto finish;
3953 }
3954 (void)mig_strncpy(procname, proc_best_name(proc), sizeof(procname));
3955 pid = task_pid(violator);
3956
3957 /* violation time ~ now */
3958 clock_get_calendar_nanotime(&secs, &nsecs);
3959 timestamp.tv_sec = (int32_t)secs;
3960 timestamp.tv_nsec = (int32_t)nsecs;
3961 /* 25567702 tracks widening mach_timespec_t */
3962
3963 /* send message */
3964 kr = task_get_special_port(current_task(), TASK_RESOURCE_NOTIFY_PORT, &dstport);
3965 if (dstport == MACH_PORT_NULL) {
3966 kr = host_get_special_port(host_priv_self(), HOST_LOCAL_NODE,
3967 HOST_RESOURCE_NOTIFY_PORT, &dstport);
3968 if (kr) {
3969 goto finish;
3970 }
3971 }
3972
3973 thread_set_honor_qlimit(curthread);
3974 kr = sendfunc(dstport,
3975 procname, pid, timestamp,
3976 current_size, limit, fatal_port,
3977 flags);
3978 thread_clear_honor_qlimit(curthread);
3979
3980 ipc_port_release_send(dstport);
3981
3982 #endif /* MACH_BSD */
3983 finish:
3984 return kr;
3985 }
3986
3987 /*
3988 * Resource violations trace four 64-bit integers. For K32, two additional
3989 * codes are allocated, the first with the low nibble doubled. So if the K64
3990 * code is 0x042, the K32 codes would be 0x044 and 0x45.
3991 */
3992 #ifdef __LP64__
3993 void
trace_resource_violation(uint16_t code,struct ledger_entry_info * linfo)3994 trace_resource_violation(uint16_t code,
3995 struct ledger_entry_info *linfo)
3996 {
3997 KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, code),
3998 linfo->lei_balance, linfo->lei_last_refill,
3999 linfo->lei_limit, linfo->lei_refill_period);
4000 }
4001 #else /* K32 */
4002 /* TODO: create/find a trace_two_LLs() for K32 systems */
4003 #define MASK32 0xffffffff
4004 void
trace_resource_violation(uint16_t code,struct ledger_entry_info * linfo)4005 trace_resource_violation(uint16_t code,
4006 struct ledger_entry_info *linfo)
4007 {
4008 int8_t lownibble = (code & 0x3) * 2;
4009 int16_t codeA = (code & 0xffc) | lownibble;
4010 int16_t codeB = codeA + 1;
4011
4012 int32_t balance_high = (linfo->lei_balance >> 32) & MASK32;
4013 int32_t balance_low = linfo->lei_balance & MASK32;
4014 int32_t last_refill_high = (linfo->lei_last_refill >> 32) & MASK32;
4015 int32_t last_refill_low = linfo->lei_last_refill & MASK32;
4016
4017 int32_t limit_high = (linfo->lei_limit >> 32) & MASK32;
4018 int32_t limit_low = linfo->lei_limit & MASK32;
4019 int32_t refill_period_high = (linfo->lei_refill_period >> 32) & MASK32;
4020 int32_t refill_period_low = linfo->lei_refill_period & MASK32;
4021
4022 KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeA),
4023 balance_high, balance_low,
4024 last_refill_high, last_refill_low);
4025 KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeB),
4026 limit_high, limit_low,
4027 refill_period_high, refill_period_low);
4028 }
4029 #endif /* K64/K32 */
4030