1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/thread.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
61 * Date: 1986
62 *
63 * Thread management primitives implementation.
64 */
65 /*
66 * Copyright (c) 1993 The University of Utah and
67 * the Computer Systems Laboratory (CSL). All rights reserved.
68 *
69 * Permission to use, copy, modify and distribute this software and its
70 * documentation is hereby granted, provided that both the copyright
71 * notice and this permission notice appear in all copies of the
72 * software, derivative works or modified versions, and any portions
73 * thereof, and that both notices appear in supporting documentation.
74 *
75 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
76 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
77 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
78 *
79 * CSL requests users of this software to return to [email protected] any
80 * improvements that they make and grant CSL redistribution rights.
81 *
82 */
83
84 #include <mach/mach_types.h>
85 #include <mach/boolean.h>
86 #include <mach/policy.h>
87 #include <mach/thread_info.h>
88 #include <mach/thread_special_ports.h>
89 #include <mach/thread_act.h>
90 #include <mach/thread_status.h>
91 #include <mach/time_value.h>
92 #include <mach/vm_param.h>
93
94 #include <machine/thread.h>
95 #include <machine/pal_routines.h>
96 #include <machine/limits.h>
97
98 #include <kern/kern_types.h>
99 #include <kern/kalloc.h>
100 #include <kern/cpu_data.h>
101 #include <kern/extmod_statistics.h>
102 #include <kern/ipc_mig.h>
103 #include <kern/ipc_tt.h>
104 #include <kern/mach_param.h>
105 #include <kern/machine.h>
106 #include <kern/misc_protos.h>
107 #include <kern/processor.h>
108 #include <kern/queue.h>
109 #include <kern/restartable.h>
110 #include <kern/sched.h>
111 #include <kern/sched_prim.h>
112 #include <kern/syscall_subr.h>
113 #include <kern/task.h>
114 #include <kern/thread.h>
115 #include <kern/thread_group.h>
116 #include <kern/coalition.h>
117 #include <kern/host.h>
118 #include <kern/zalloc.h>
119 #include <kern/assert.h>
120 #include <kern/exc_resource.h>
121 #include <kern/exc_guard.h>
122 #include <kern/telemetry.h>
123 #include <kern/policy_internal.h>
124 #include <kern/turnstile.h>
125 #include <kern/sched_clutch.h>
126 #include <kern/recount.h>
127 #include <kern/smr.h>
128 #include <kern/ast.h>
129 #include <kern/compact_id.h>
130
131 #include <corpses/task_corpse.h>
132 #if KPC
133 #include <kern/kpc.h>
134 #endif
135
136 #if CONFIG_PERVASIVE_CPI
137 #include <kern/monotonic.h>
138 #include <machine/monotonic.h>
139 #endif /* CONFIG_PERVASIVE_CPI */
140
141 #include <ipc/ipc_kmsg.h>
142 #include <ipc/ipc_port.h>
143 #include <bank/bank_types.h>
144
145 #include <vm/vm_kern.h>
146 #include <vm/vm_pageout.h>
147
148 #include <sys/kdebug.h>
149 #include <sys/bsdtask_info.h>
150 #include <mach/sdt.h>
151 #include <san/kasan.h>
152 #include <san/kcov_stksz.h>
153
154 #include <stdatomic.h>
155
156 #if defined(HAS_APPLE_PAC)
157 #include <ptrauth.h>
158 #include <arm64/proc_reg.h>
159 #endif /* defined(HAS_APPLE_PAC) */
160
161 /*
162 * Exported interfaces
163 */
164 #include <mach/task_server.h>
165 #include <mach/thread_act_server.h>
166 #include <mach/mach_host_server.h>
167 #include <mach/host_priv_server.h>
168 #include <mach/mach_voucher_server.h>
169 #include <kern/policy_internal.h>
170
171 #if CONFIG_MACF
172 #include <security/mac_mach_internal.h>
173 #endif
174
175 #include <pthread/workqueue_trace.h>
176
177 LCK_GRP_DECLARE(thread_lck_grp, "thread");
178
179 static SECURITY_READ_ONLY_LATE(zone_t) thread_zone;
180 ZONE_DEFINE_ID(ZONE_ID_THREAD_RO, "threads_ro", struct thread_ro, ZC_READONLY);
181
182 static void thread_port_with_flavor_no_senders(ipc_port_t, mach_port_mscount_t);
183
184 IPC_KOBJECT_DEFINE(IKOT_THREAD_CONTROL);
185 IPC_KOBJECT_DEFINE(IKOT_THREAD_READ,
186 .iko_op_no_senders = thread_port_with_flavor_no_senders);
187 IPC_KOBJECT_DEFINE(IKOT_THREAD_INSPECT,
188 .iko_op_no_senders = thread_port_with_flavor_no_senders);
189
190 static struct mpsc_daemon_queue thread_stack_queue;
191 static struct mpsc_daemon_queue thread_terminate_queue;
192 static struct mpsc_daemon_queue thread_deallocate_queue;
193 static struct mpsc_daemon_queue thread_exception_queue;
194 static struct mpsc_daemon_queue thread_backtrace_queue;
195
196 decl_simple_lock_data(static, crashed_threads_lock);
197 static queue_head_t crashed_threads_queue;
198
199 struct thread_exception_elt {
200 struct mpsc_queue_chain link;
201 exception_type_t exception_type;
202 task_t exception_task;
203 thread_t exception_thread;
204 };
205
206 struct thread_backtrace_elt {
207 struct mpsc_queue_chain link;
208 exception_type_t exception_type;
209 kcdata_object_t obj;
210 exception_port_t exc_ports[BT_EXC_PORTS_COUNT]; /* send rights */
211 };
212
213 static SECURITY_READ_ONLY_LATE(struct thread) thread_template = {
214 #if MACH_ASSERT
215 .thread_magic = THREAD_MAGIC,
216 #endif /* MACH_ASSERT */
217 .wait_result = THREAD_WAITING,
218 .options = THREAD_ABORTSAFE,
219 .state = TH_WAIT | TH_UNINT,
220 .th_sched_bucket = TH_BUCKET_RUN,
221 .base_pri = BASEPRI_DEFAULT,
222 .realtime.deadline = UINT64_MAX,
223 .last_made_runnable_time = THREAD_NOT_RUNNABLE,
224 .last_basepri_change_time = THREAD_NOT_RUNNABLE,
225 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
226 .pri_shift = INT8_MAX,
227 #endif
228 /* timers are initialized in thread_bootstrap */
229 };
230
231 #define CTID_SIZE_BIT 20
232 #define CTID_MASK ((1u << CTID_SIZE_BIT) - 1)
233 #define CTID_MAX_THREAD_NUMBER (CTID_MASK - 1)
234 static_assert(CTID_MAX_THREAD_NUMBER <= COMPACT_ID_MAX);
235
236 #ifndef __LITTLE_ENDIAN__
237 #error "ctid relies on the ls bits of uint32_t to be populated"
238 #endif
239
240 __startup_data
241 static struct thread init_thread;
242 static SECURITY_READ_ONLY_LATE(uint32_t) ctid_nonce;
243 COMPACT_ID_TABLE_DEFINE(static, ctid_table);
244
245 __startup_func
246 static void
thread_zone_startup(void)247 thread_zone_startup(void)
248 {
249 size_t size = sizeof(struct thread);
250
251 #ifdef MACH_BSD
252 size += roundup(uthread_size, _Alignof(struct thread));
253 #endif
254 thread_zone = zone_create_ext("threads", size,
255 ZC_SEQUESTER | ZC_ZFREE_CLEARMEM, ZONE_ID_THREAD, NULL);
256 }
257 STARTUP(ZALLOC, STARTUP_RANK_FOURTH, thread_zone_startup);
258
259 static void thread_deallocate_enqueue(thread_t thread);
260 static void thread_deallocate_complete(thread_t thread);
261
262 static void ctid_table_remove(thread_t thread);
263 static void ctid_table_add(thread_t thread);
264 static void ctid_table_init(void);
265
266 #ifdef MACH_BSD
267 extern void proc_exit(void *);
268 extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
269 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
270 extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
271 extern uint64_t get_wq_quantum_offset_from_proc(void *);
272 extern int proc_selfpid(void);
273 extern void proc_name(int, char*, int);
274 extern char * proc_name_address(void *p);
275 exception_type_t get_exception_from_corpse_crashinfo(kcdata_descriptor_t corpse_info);
276 extern void kdebug_proc_name_args(struct proc *proc, long args[static 4]);
277 #endif /* MACH_BSD */
278
279 extern bool bsdthread_part_of_cooperative_workqueue(struct uthread *uth);
280 extern int disable_exc_resource;
281 extern int audio_active;
282 extern int debug_task;
283 int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */
284 int task_threadmax = CONFIG_THREAD_MAX;
285
286 static uint64_t thread_unique_id = 100;
287
288 struct _thread_ledger_indices thread_ledgers = { .cpu_time = -1 };
289 static ledger_template_t thread_ledger_template = NULL;
290 static void init_thread_ledgers(void);
291
292 #if CONFIG_JETSAM
293 void jetsam_on_ledger_cpulimit_exceeded(void);
294 #endif
295
296 extern int task_thread_soft_limit;
297
298 #if DEVELOPMENT || DEBUG
299 extern int exc_resource_threads_enabled;
300 #endif /* DEVELOPMENT || DEBUG */
301
302 /*
303 * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
304 *
305 * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
306 * stacktraces, aka micro-stackshots)
307 */
308 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
309
310 /* Percentage. Level at which we start gathering telemetry. */
311 static TUNABLE(uint8_t, cpumon_ustackshots_trigger_pct,
312 "cpumon_ustackshots_trigger_pct", CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT);
313 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
314 #if DEVELOPMENT || DEBUG
315 void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t, int);
316 #endif /* DEVELOPMENT || DEBUG */
317
318 /*
319 * The smallest interval over which we support limiting CPU consumption is 1ms
320 */
321 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
322
323 os_refgrp_decl(static, thread_refgrp, "thread", NULL);
324
325 static inline void
init_thread_from_template(thread_t thread)326 init_thread_from_template(thread_t thread)
327 {
328 /*
329 * In general, struct thread isn't trivially-copyable, since it may
330 * contain pointers to thread-specific state. This may be enforced at
331 * compile time on architectures that store authed + diversified
332 * pointers in machine_thread.
333 *
334 * In this specific case, where we're initializing a new thread from a
335 * thread_template, we know all diversified pointers are NULL; these are
336 * safe to bitwise copy.
337 */
338 #pragma clang diagnostic push
339 #pragma clang diagnostic ignored "-Wnontrivial-memaccess"
340 memcpy(thread, &thread_template, sizeof(*thread));
341 #pragma clang diagnostic pop
342 }
343
344 static void
thread_ro_create(task_t parent_task,thread_t th,thread_ro_t tro_tpl)345 thread_ro_create(task_t parent_task, thread_t th, thread_ro_t tro_tpl)
346 {
347 #if __x86_64__
348 th->t_task = parent_task;
349 #endif
350 tro_tpl->tro_owner = th;
351 tro_tpl->tro_task = parent_task;
352 th->t_tro = zalloc_ro(ZONE_ID_THREAD_RO, Z_WAITOK | Z_ZERO | Z_NOFAIL);
353 zalloc_ro_update_elem(ZONE_ID_THREAD_RO, th->t_tro, tro_tpl);
354 }
355
356 static void
thread_ro_destroy(thread_t th)357 thread_ro_destroy(thread_t th)
358 {
359 thread_ro_t tro = get_thread_ro(th);
360 #if MACH_BSD
361 struct ucred *cred = tro->tro_cred;
362 #endif
363
364 zfree_ro(ZONE_ID_THREAD_RO, tro);
365 #if MACH_BSD
366 if (cred) {
367 uthread_cred_free(cred);
368 }
369 #endif
370 }
371
372 #if MACH_BSD
373 extern void kauth_cred_set(struct ucred **, struct ucred *);
374
375 void
thread_ro_update_cred(thread_ro_t tro,struct ucred * ucred)376 thread_ro_update_cred(thread_ro_t tro, struct ucred *ucred)
377 {
378 struct ucred *my_cred = tro->tro_cred;
379 if (my_cred != ucred) {
380 kauth_cred_set(&my_cred, ucred);
381 zalloc_ro_update_field(ZONE_ID_THREAD_RO, tro, tro_cred, &my_cred);
382 }
383 }
384
385 void
thread_ro_update_flags(thread_ro_t tro,thread_ro_flags_t add,thread_ro_flags_t clr)386 thread_ro_update_flags(thread_ro_t tro, thread_ro_flags_t add, thread_ro_flags_t clr)
387 {
388 thread_ro_flags_t flags = (tro->tro_flags & ~clr) | add;
389 zalloc_ro_update_field(ZONE_ID_THREAD_RO, tro, tro_flags, &flags);
390 }
391 #endif
392
393 __startup_func
394 thread_t
thread_bootstrap(void)395 thread_bootstrap(void)
396 {
397 /*
398 * Fill in a template thread for fast initialization.
399 */
400 timer_init(&thread_template.runnable_timer);
401
402 init_thread_from_template(&init_thread);
403 /* fiddle with init thread to skip asserts in set_sched_pri */
404 init_thread.sched_pri = MAXPRI_KERNEL;
405
406 /*
407 * We can't quite use ctid yet, on ARM thread_bootstrap() is called
408 * before we can call random or anything,
409 * so we just make it barely work and it will get fixed up
410 * when the first thread is actually made.
411 */
412 *compact_id_resolve(&ctid_table, 0) = &init_thread;
413 init_thread.ctid = CTID_MASK;
414
415 return &init_thread;
416 }
417
418 void
thread_machine_init_template(void)419 thread_machine_init_template(void)
420 {
421 machine_thread_template_init(&thread_template);
422 }
423
424 void
thread_init(void)425 thread_init(void)
426 {
427 /*
428 * Initialize any machine-dependent
429 * per-thread structures necessary.
430 */
431 machine_thread_init();
432
433 init_thread_ledgers();
434 }
435
436 boolean_t
thread_is_active(thread_t thread)437 thread_is_active(thread_t thread)
438 {
439 return thread->active;
440 }
441
442 void
thread_corpse_continue(void)443 thread_corpse_continue(void)
444 {
445 thread_t thread = current_thread();
446
447 thread_terminate_internal(thread);
448
449 /*
450 * Handle the thread termination directly
451 * here instead of returning to userspace.
452 */
453 assert(thread->active == FALSE);
454 thread_ast_clear(thread, AST_APC);
455 thread_apc_ast(thread);
456
457 panic("thread_corpse_continue");
458 /*NOTREACHED*/
459 }
460
461 __dead2
462 static void
thread_terminate_continue(void)463 thread_terminate_continue(void)
464 {
465 panic("thread_terminate_continue");
466 /*NOTREACHED*/
467 }
468
469 /*
470 * thread_terminate_self:
471 */
472 void
thread_terminate_self(void)473 thread_terminate_self(void)
474 {
475 thread_t thread = current_thread();
476 thread_ro_t tro = get_thread_ro(thread);
477 task_t task = tro->tro_task;
478 void *bsd_info = get_bsdtask_info(task);
479 int threadcnt;
480
481 pal_thread_terminate_self(thread);
482
483 DTRACE_PROC(lwp__exit);
484
485 thread_mtx_lock(thread);
486
487 ipc_thread_disable(thread);
488
489 thread_mtx_unlock(thread);
490
491 thread_sched_call(thread, NULL);
492
493 spl_t s = splsched();
494 thread_lock(thread);
495
496 thread_depress_abort_locked(thread);
497
498 /*
499 * Before we take the thread_lock right above,
500 * act_set_ast_reset_pcs() might not yet observe
501 * that the thread is inactive, and could have
502 * requested an IPI Ack.
503 *
504 * Once we unlock the thread, we know that
505 * act_set_ast_reset_pcs() can't fail to notice
506 * that thread->active is false,
507 * and won't set new ones.
508 */
509 thread_reset_pcs_ack_IPI(thread);
510
511 thread_unlock(thread);
512
513 splx(s);
514
515 #if CONFIG_TASKWATCH
516 thead_remove_taskwatch(thread);
517 #endif /* CONFIG_TASKWATCH */
518
519 work_interval_thread_terminate(thread);
520
521 thread_mtx_lock(thread);
522
523 thread_policy_reset(thread);
524
525 thread_mtx_unlock(thread);
526
527 assert(thread->th_work_interval == NULL);
528
529 bank_swap_thread_bank_ledger(thread, NULL);
530
531 if (kdebug_enable && bsd_hasthreadname(get_bsdthread_info(thread))) {
532 char threadname[MAXTHREADNAMESIZE];
533 bsd_getthreadname(get_bsdthread_info(thread), threadname);
534 kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, threadname);
535 }
536
537 uthread_cleanup(get_bsdthread_info(thread), tro);
538
539 if (kdebug_enable && bsd_info && !task_is_exec_copy(task)) {
540 /* trace out pid before we sign off */
541 long dbg_arg1 = 0;
542 long dbg_arg2 = 0;
543
544 kdbg_trace_data(get_bsdtask_info(task), &dbg_arg1, &dbg_arg2);
545 #if CONFIG_PERVASIVE_CPI
546 if (kdebug_debugid_enabled(DBG_MT_INSTRS_CYCLES_THR_EXIT)) {
547 struct recount_usage usage = { 0 };
548 struct recount_usage perf_only = { 0 };
549 boolean_t intrs_end = ml_set_interrupts_enabled(FALSE);
550 recount_current_thread_usage_perf_only(&usage, &perf_only);
551 ml_set_interrupts_enabled(intrs_end);
552 KDBG_RELEASE(DBG_MT_INSTRS_CYCLES_THR_EXIT,
553 usage.ru_instructions,
554 usage.ru_cycles,
555 usage.ru_system_time_mach,
556 usage.ru_user_time_mach);
557 #if __AMP__
558 KDBG_RELEASE(DBG_MT_P_INSTRS_CYCLES_THR_EXIT,
559 perf_only.ru_instructions,
560 perf_only.ru_cycles,
561 perf_only.ru_system_time_mach,
562 perf_only.ru_user_time_mach);
563
564 #endif // __AMP__
565 }
566 #endif/* CONFIG_PERVASIVE_CPI */
567 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE_PID, dbg_arg1, dbg_arg2);
568 }
569
570 /*
571 * After this subtraction, this thread should never access
572 * task->bsd_info unless it got 0 back from the os_atomic_dec. It
573 * could be racing with other threads to be the last thread in the
574 * process, and the last thread in the process will tear down the proc
575 * structure and zero-out task->bsd_info.
576 */
577 threadcnt = os_atomic_dec(&task->active_thread_count, relaxed);
578
579 #if CONFIG_COALITIONS
580 /*
581 * Leave the coalitions when last thread of task is exiting and the
582 * task is not a corpse.
583 */
584 if (threadcnt == 0 && !task->corpse_info) {
585 coalitions_remove_task(task);
586 }
587 #endif
588
589 /*
590 * If we are the last thread to terminate and the task is
591 * associated with a BSD process, perform BSD process exit.
592 */
593 if (threadcnt == 0 && bsd_info != NULL) {
594 mach_exception_data_type_t subcode = 0;
595 if (kdebug_enable) {
596 /* since we're the last thread in this process, trace out the command name too */
597 long args[4] = { 0 };
598 kdebug_proc_name_args(bsd_info, args);
599 #if CONFIG_PERVASIVE_CPI
600 if (kdebug_debugid_enabled(DBG_MT_INSTRS_CYCLES_PROC_EXIT)) {
601 struct recount_usage usage = { 0 };
602 struct recount_usage perf_only = { 0 };
603 recount_current_task_usage_perf_only(&usage, &perf_only);
604 KDBG_RELEASE(DBG_MT_INSTRS_CYCLES_PROC_EXIT,
605 usage.ru_instructions,
606 usage.ru_cycles,
607 usage.ru_system_time_mach,
608 usage.ru_user_time_mach);
609 #if __AMP__
610 KDBG_RELEASE(DBG_MT_P_INSTRS_CYCLES_PROC_EXIT,
611 perf_only.ru_instructions,
612 perf_only.ru_cycles,
613 perf_only.ru_system_time_mach,
614 perf_only.ru_user_time_mach);
615 #endif // __AMP__
616 }
617 #endif/* CONFIG_PERVASIVE_CPI */
618 KDBG_RELEASE(TRACE_STRING_PROC_EXIT, args[0], args[1], args[2], args[3]);
619 }
620
621 /* Get the exit reason before proc_exit */
622 subcode = proc_encode_exit_exception_code(bsd_info);
623 proc_exit(bsd_info);
624 bsd_info = NULL;
625 /*
626 * if there is crash info in task
627 * then do the deliver action since this is
628 * last thread for this task.
629 */
630 if (task->corpse_info) {
631 /* reset all except task name port */
632 ipc_task_reset(task);
633 /* enable all task ports (name port unchanged) */
634 ipc_task_enable(task);
635 exception_type_t etype = get_exception_from_corpse_crashinfo(task->corpse_info);
636 task_deliver_crash_notification(task, current_thread(), etype, subcode);
637 }
638 }
639
640 if (threadcnt == 0) {
641 task_lock(task);
642 if (task_is_a_corpse_fork(task)) {
643 thread_wakeup((event_t)&task->active_thread_count);
644 }
645 task_unlock(task);
646 }
647
648 s = splsched();
649 thread_lock(thread);
650
651 /*
652 * Ensure that the depress timer is no longer enqueued,
653 * so the timer can be safely deallocated
654 *
655 * TODO: build timer_call_cancel_wait
656 */
657
658 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0);
659
660 uint32_t delay_us = 1;
661
662 while (thread->depress_timer_active > 0) {
663 thread_unlock(thread);
664 splx(s);
665
666 delay(delay_us++);
667
668 if (delay_us > USEC_PER_SEC) {
669 panic("depress timer failed to inactivate!"
670 "thread: %p depress_timer_active: %d",
671 thread, thread->depress_timer_active);
672 }
673
674 s = splsched();
675 thread_lock(thread);
676 }
677
678 /*
679 * Cancel wait timer, and wait for
680 * concurrent expirations.
681 */
682 if (thread->wait_timer_is_set) {
683 thread->wait_timer_is_set = FALSE;
684
685 if (timer_call_cancel(thread->wait_timer)) {
686 thread->wait_timer_active--;
687 }
688 }
689
690 delay_us = 1;
691
692 while (thread->wait_timer_active > 0) {
693 thread_unlock(thread);
694 splx(s);
695
696 delay(delay_us++);
697
698 if (delay_us > USEC_PER_SEC) {
699 panic("wait timer failed to inactivate!"
700 "thread: %p wait_timer_active: %d",
701 thread, thread->wait_timer_active);
702 }
703
704 s = splsched();
705 thread_lock(thread);
706 }
707
708 /*
709 * If there is a reserved stack, release it.
710 */
711 if (thread->reserved_stack != 0) {
712 stack_free_reserved(thread);
713 thread->reserved_stack = 0;
714 }
715
716 /*
717 * Mark thread as terminating, and block.
718 */
719 thread->state |= TH_TERMINATE;
720 thread_mark_wait_locked(thread, THREAD_UNINT);
721
722 assert(thread->th_work_interval_flags == TH_WORK_INTERVAL_FLAGS_NONE);
723 assert(thread->kern_promotion_schedpri == 0);
724 if (thread->rwlock_count > 0) {
725 panic("rwlock_count is %d for thread %p, possibly it still holds a rwlock", thread->rwlock_count, thread);
726 }
727 assert(thread->priority_floor_count == 0);
728 assert(thread->handoff_thread == THREAD_NULL);
729 assert(thread->th_work_interval == NULL);
730 assert(thread->t_rr_state.trr_value == 0);
731
732 assert3u(0, ==, thread->sched_flags &
733 (TH_SFLAG_WAITQ_PROMOTED |
734 TH_SFLAG_RW_PROMOTED |
735 TH_SFLAG_EXEC_PROMOTED |
736 TH_SFLAG_FLOOR_PROMOTED |
737 TH_SFLAG_PROMOTED |
738 TH_SFLAG_DEPRESS));
739
740 thread_unlock(thread);
741 /* splsched */
742
743 thread_block((thread_continue_t)thread_terminate_continue);
744 /*NOTREACHED*/
745 }
746
747 static bool
thread_ref_release(thread_t thread)748 thread_ref_release(thread_t thread)
749 {
750 if (thread == THREAD_NULL) {
751 return false;
752 }
753
754 assert_thread_magic(thread);
755
756 return os_ref_release_raw(&thread->ref_count, &thread_refgrp) == 0;
757 }
758
759 /* Drop a thread refcount safely without triggering a zfree */
760 void
thread_deallocate_safe(thread_t thread)761 thread_deallocate_safe(thread_t thread)
762 {
763 if (__improbable(thread_ref_release(thread))) {
764 /* enqueue the thread for thread deallocate deamon to call thread_deallocate_complete */
765 thread_deallocate_enqueue(thread);
766 }
767 }
768
769 void
thread_deallocate(thread_t thread)770 thread_deallocate(thread_t thread)
771 {
772 if (__improbable(thread_ref_release(thread))) {
773 thread_deallocate_complete(thread);
774 }
775 }
776
777 void
thread_deallocate_complete(thread_t thread)778 thread_deallocate_complete(
779 thread_t thread)
780 {
781 task_t task;
782
783 assert_thread_magic(thread);
784
785 assert(os_ref_get_count_raw(&thread->ref_count) == 0);
786
787 if (!(thread->state & TH_TERMINATE2)) {
788 panic("thread_deallocate: thread not properly terminated");
789 }
790
791 assert(thread->runq == PROCESSOR_NULL);
792
793 #if KPC
794 kpc_thread_destroy(thread);
795 #endif /* KPC */
796
797 ipc_thread_terminate(thread);
798
799 proc_thread_qos_deallocate(thread);
800
801 task = get_threadtask(thread);
802
803 #ifdef MACH_BSD
804 uthread_destroy(get_bsdthread_info(thread));
805 #endif /* MACH_BSD */
806
807 if (thread->t_ledger) {
808 ledger_dereference(thread->t_ledger);
809 }
810 if (thread->t_threadledger) {
811 ledger_dereference(thread->t_threadledger);
812 }
813
814 assert(thread->turnstile != TURNSTILE_NULL);
815 if (thread->turnstile) {
816 turnstile_deallocate(thread->turnstile);
817 }
818 turnstile_compact_id_put(thread->ctsid);
819
820 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
821 ipc_voucher_release(thread->ith_voucher);
822 }
823
824 kfree_data(thread->thread_io_stats, sizeof(struct io_stat_info));
825 #if CONFIG_PREADOPT_TG
826 if (thread->old_preadopt_thread_group) {
827 thread_group_release(thread->old_preadopt_thread_group);
828 }
829
830 if (thread->preadopt_thread_group) {
831 thread_group_release(thread->preadopt_thread_group);
832 }
833 #endif /* CONFIG_PREADOPT_TG */
834
835 if (thread->kernel_stack != 0) {
836 stack_free(thread);
837 }
838
839 recount_thread_deinit(&thread->th_recount);
840
841 lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
842 machine_thread_destroy(thread);
843
844 task_deallocate_grp(task, TASK_GRP_INTERNAL);
845
846 #if MACH_ASSERT
847 assert_thread_magic(thread);
848 thread->thread_magic = 0;
849 #endif /* MACH_ASSERT */
850
851 lck_mtx_lock(&tasks_threads_lock);
852 assert(terminated_threads_count > 0);
853 queue_remove(&terminated_threads, thread, thread_t, threads);
854 terminated_threads_count--;
855 lck_mtx_unlock(&tasks_threads_lock);
856
857 timer_call_free(thread->depress_timer);
858 timer_call_free(thread->wait_timer);
859
860 ctid_table_remove(thread);
861
862 thread_ro_destroy(thread);
863 zfree(thread_zone, thread);
864 }
865
866 /*
867 * thread_inspect_deallocate:
868 *
869 * Drop a thread inspection reference.
870 */
871 void
thread_inspect_deallocate(thread_inspect_t thread_inspect)872 thread_inspect_deallocate(
873 thread_inspect_t thread_inspect)
874 {
875 return thread_deallocate((thread_t)thread_inspect);
876 }
877
878 /*
879 * thread_read_deallocate:
880 *
881 * Drop a reference on thread read port.
882 */
883 void
thread_read_deallocate(thread_read_t thread_read)884 thread_read_deallocate(
885 thread_read_t thread_read)
886 {
887 return thread_deallocate((thread_t)thread_read);
888 }
889
890
891 /*
892 * thread_exception_queue_invoke:
893 *
894 * Deliver EXC_{RESOURCE,GUARD} exception
895 */
896 static void
thread_exception_queue_invoke(mpsc_queue_chain_t elm,__assert_only mpsc_daemon_queue_t dq)897 thread_exception_queue_invoke(mpsc_queue_chain_t elm,
898 __assert_only mpsc_daemon_queue_t dq)
899 {
900 struct thread_exception_elt *elt;
901 task_t task;
902 thread_t thread;
903 exception_type_t etype;
904
905 assert(dq == &thread_exception_queue);
906 elt = mpsc_queue_element(elm, struct thread_exception_elt, link);
907
908 etype = elt->exception_type;
909 task = elt->exception_task;
910 thread = elt->exception_thread;
911 assert_thread_magic(thread);
912
913 kfree_type(struct thread_exception_elt, elt);
914
915 /* wait for all the threads in the task to terminate */
916 task_lock(task);
917 task_wait_till_threads_terminate_locked(task);
918 task_unlock(task);
919
920 /* Consumes the task ref returned by task_generate_corpse_internal */
921 task_deallocate(task);
922 /* Consumes the thread ref returned by task_generate_corpse_internal */
923 thread_deallocate(thread);
924
925 /* Deliver the notification, also clears the corpse. */
926 task_deliver_crash_notification(task, thread, etype, 0);
927 }
928
929 static void
thread_backtrace_queue_invoke(mpsc_queue_chain_t elm,__assert_only mpsc_daemon_queue_t dq)930 thread_backtrace_queue_invoke(mpsc_queue_chain_t elm,
931 __assert_only mpsc_daemon_queue_t dq)
932 {
933 struct thread_backtrace_elt *elt;
934 kcdata_object_t obj;
935 exception_port_t exc_ports[BT_EXC_PORTS_COUNT]; /* send rights */
936 exception_type_t etype;
937
938 assert(dq == &thread_backtrace_queue);
939 elt = mpsc_queue_element(elm, struct thread_backtrace_elt, link);
940
941 obj = elt->obj;
942 memcpy(exc_ports, elt->exc_ports, sizeof(ipc_port_t) * BT_EXC_PORTS_COUNT);
943 etype = elt->exception_type;
944
945 kfree_type(struct thread_backtrace_elt, elt);
946
947 /* Deliver to backtrace exception ports */
948 exception_deliver_backtrace(obj, exc_ports, etype);
949
950 /*
951 * Release port right and kcdata object refs given by
952 * task_enqueue_exception_with_corpse()
953 */
954
955 for (unsigned int i = 0; i < BT_EXC_PORTS_COUNT; i++) {
956 ipc_port_release_send(exc_ports[i]);
957 }
958
959 kcdata_object_release(obj);
960 }
961
962 /*
963 * thread_exception_enqueue:
964 *
965 * Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
966 */
967 void
thread_exception_enqueue(task_t task,thread_t thread,exception_type_t etype)968 thread_exception_enqueue(
969 task_t task,
970 thread_t thread,
971 exception_type_t etype)
972 {
973 assert(EXC_RESOURCE == etype || EXC_GUARD == etype);
974 struct thread_exception_elt *elt = kalloc_type(struct thread_exception_elt, Z_WAITOK | Z_NOFAIL);
975 elt->exception_type = etype;
976 elt->exception_task = task;
977 elt->exception_thread = thread;
978
979 mpsc_daemon_enqueue(&thread_exception_queue, &elt->link,
980 MPSC_QUEUE_DISABLE_PREEMPTION);
981 }
982
983 void
thread_backtrace_enqueue(kcdata_object_t obj,exception_port_t ports[static BT_EXC_PORTS_COUNT],exception_type_t etype)984 thread_backtrace_enqueue(
985 kcdata_object_t obj,
986 exception_port_t ports[static BT_EXC_PORTS_COUNT],
987 exception_type_t etype)
988 {
989 struct thread_backtrace_elt *elt = kalloc_type(struct thread_backtrace_elt, Z_WAITOK | Z_NOFAIL);
990 elt->obj = obj;
991 elt->exception_type = etype;
992
993 memcpy(elt->exc_ports, ports, sizeof(ipc_port_t) * BT_EXC_PORTS_COUNT);
994
995 mpsc_daemon_enqueue(&thread_backtrace_queue, &elt->link,
996 MPSC_QUEUE_DISABLE_PREEMPTION);
997 }
998
999 /*
1000 * thread_copy_resource_info
1001 *
1002 * Copy the resource info counters from source
1003 * thread to destination thread.
1004 */
1005 void
thread_copy_resource_info(thread_t dst_thread,thread_t src_thread)1006 thread_copy_resource_info(
1007 thread_t dst_thread,
1008 thread_t src_thread)
1009 {
1010 dst_thread->c_switch = src_thread->c_switch;
1011 dst_thread->p_switch = src_thread->p_switch;
1012 dst_thread->ps_switch = src_thread->ps_switch;
1013 dst_thread->sched_time_save = src_thread->sched_time_save;
1014 dst_thread->runnable_timer = src_thread->runnable_timer;
1015 dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
1016 dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
1017 dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
1018 dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
1019 dst_thread->syscalls_unix = src_thread->syscalls_unix;
1020 dst_thread->syscalls_mach = src_thread->syscalls_mach;
1021 ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
1022 recount_thread_copy(&dst_thread->th_recount, &src_thread->th_recount);
1023 *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
1024 }
1025
1026 static void
thread_terminate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)1027 thread_terminate_queue_invoke(mpsc_queue_chain_t e,
1028 __assert_only mpsc_daemon_queue_t dq)
1029 {
1030 thread_t thread = mpsc_queue_element(e, struct thread, mpsc_links);
1031 task_t task = get_threadtask(thread);
1032
1033 assert(dq == &thread_terminate_queue);
1034
1035 task_lock(task);
1036
1037 /*
1038 * if marked for crash reporting, skip reaping.
1039 * The corpse delivery thread will clear bit and enqueue
1040 * for reaping when done
1041 *
1042 * Note: the inspection field is set under the task lock
1043 *
1044 * FIXME[mad]: why enqueue for termination before `inspection` is false ?
1045 */
1046 if (__improbable(thread->inspection)) {
1047 simple_lock(&crashed_threads_lock, &thread_lck_grp);
1048 task_unlock(task);
1049
1050 enqueue_tail(&crashed_threads_queue, &thread->runq_links);
1051 simple_unlock(&crashed_threads_lock);
1052 return;
1053 }
1054
1055 recount_task_rollup_thread(&task->tk_recount, &thread->th_recount);
1056
1057 task->total_runnable_time += timer_grab(&thread->runnable_timer);
1058 task->c_switch += thread->c_switch;
1059 task->p_switch += thread->p_switch;
1060 task->ps_switch += thread->ps_switch;
1061
1062 task->syscalls_unix += thread->syscalls_unix;
1063 task->syscalls_mach += thread->syscalls_mach;
1064
1065 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
1066 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
1067 task->task_gpu_ns += ml_gpu_stat(thread);
1068 task->decompressions += thread->decompressions;
1069
1070 thread_update_qos_cpu_time(thread);
1071
1072 queue_remove(&task->threads, thread, thread_t, task_threads);
1073 task->thread_count--;
1074
1075 /*
1076 * If the task is being halted, and there is only one thread
1077 * left in the task after this one, then wakeup that thread.
1078 */
1079 if (task->thread_count == 1 && task->halting) {
1080 thread_wakeup((event_t)&task->halting);
1081 }
1082
1083 task_unlock(task);
1084
1085 lck_mtx_lock(&tasks_threads_lock);
1086 queue_remove(&threads, thread, thread_t, threads);
1087 threads_count--;
1088 queue_enter(&terminated_threads, thread, thread_t, threads);
1089 terminated_threads_count++;
1090 lck_mtx_unlock(&tasks_threads_lock);
1091
1092 #if MACH_BSD
1093 /*
1094 * The thread no longer counts against the task's thread count,
1095 * we can now wake up any pending joiner.
1096 *
1097 * Note that the inheritor will be set to `thread` which is
1098 * incorrect once it is on the termination queue, however
1099 * the termination queue runs at MINPRI_KERNEL which is higher
1100 * than any user thread, so this isn't a priority inversion.
1101 */
1102 if (thread_get_tag(thread) & THREAD_TAG_USER_JOIN) {
1103 struct uthread *uth = get_bsdthread_info(thread);
1104 mach_port_name_t kport = uthread_joiner_port(uth);
1105
1106 /*
1107 * Clear the port low two bits to tell pthread that thread is gone.
1108 */
1109 #ifndef NO_PORT_GEN
1110 kport &= ~MACH_PORT_MAKE(0, IE_BITS_GEN_MASK + IE_BITS_GEN_ONE);
1111 #else
1112 kport |= MACH_PORT_MAKE(0, ~(IE_BITS_GEN_MASK + IE_BITS_GEN_ONE));
1113 #endif
1114 (void)copyoutmap_atomic32(task->map, kport,
1115 uthread_joiner_address(uth));
1116 uthread_joiner_wake(task, uth);
1117 }
1118 #endif
1119
1120 thread_deallocate(thread);
1121 }
1122
1123 static void
thread_deallocate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)1124 thread_deallocate_queue_invoke(mpsc_queue_chain_t e,
1125 __assert_only mpsc_daemon_queue_t dq)
1126 {
1127 thread_t thread = mpsc_queue_element(e, struct thread, mpsc_links);
1128
1129 assert(dq == &thread_deallocate_queue);
1130
1131 thread_deallocate_complete(thread);
1132 }
1133
1134 /*
1135 * thread_terminate_enqueue:
1136 *
1137 * Enqueue a terminating thread for final disposition.
1138 *
1139 * Called at splsched.
1140 */
1141 void
thread_terminate_enqueue(thread_t thread)1142 thread_terminate_enqueue(
1143 thread_t thread)
1144 {
1145 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE, thread->thread_id);
1146
1147 mpsc_daemon_enqueue(&thread_terminate_queue, &thread->mpsc_links,
1148 MPSC_QUEUE_DISABLE_PREEMPTION);
1149 }
1150
1151 /*
1152 * thread_deallocate_enqueue:
1153 *
1154 * Enqueue a thread for final deallocation.
1155 */
1156 static void
thread_deallocate_enqueue(thread_t thread)1157 thread_deallocate_enqueue(
1158 thread_t thread)
1159 {
1160 mpsc_daemon_enqueue(&thread_deallocate_queue, &thread->mpsc_links,
1161 MPSC_QUEUE_DISABLE_PREEMPTION);
1162 }
1163
1164 /*
1165 * thread_terminate_crashed_threads:
1166 * walk the list of crashed threads and put back set of threads
1167 * who are no longer being inspected.
1168 */
1169 void
thread_terminate_crashed_threads(void)1170 thread_terminate_crashed_threads(void)
1171 {
1172 thread_t th_remove;
1173
1174 simple_lock(&crashed_threads_lock, &thread_lck_grp);
1175 /*
1176 * loop through the crashed threads queue
1177 * to put any threads that are not being inspected anymore
1178 */
1179
1180 qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
1181 /* make sure current_thread is never in crashed queue */
1182 assert(th_remove != current_thread());
1183
1184 if (th_remove->inspection == FALSE) {
1185 remqueue(&th_remove->runq_links);
1186 mpsc_daemon_enqueue(&thread_terminate_queue, &th_remove->mpsc_links,
1187 MPSC_QUEUE_NONE);
1188 }
1189 }
1190
1191 simple_unlock(&crashed_threads_lock);
1192 }
1193
1194 /*
1195 * thread_stack_queue_invoke:
1196 *
1197 * Perform stack allocation as required due to
1198 * invoke failures.
1199 */
1200 static void
thread_stack_queue_invoke(mpsc_queue_chain_t elm,__assert_only mpsc_daemon_queue_t dq)1201 thread_stack_queue_invoke(mpsc_queue_chain_t elm,
1202 __assert_only mpsc_daemon_queue_t dq)
1203 {
1204 thread_t thread = mpsc_queue_element(elm, struct thread, mpsc_links);
1205
1206 assert(dq == &thread_stack_queue);
1207
1208 /* allocate stack with interrupts enabled so that we can call into VM */
1209 stack_alloc(thread);
1210
1211 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
1212
1213 spl_t s = splsched();
1214 thread_lock(thread);
1215 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1216 thread_unlock(thread);
1217 splx(s);
1218 }
1219
1220 /*
1221 * thread_stack_enqueue:
1222 *
1223 * Enqueue a thread for stack allocation.
1224 *
1225 * Called at splsched.
1226 */
1227 void
thread_stack_enqueue(thread_t thread)1228 thread_stack_enqueue(
1229 thread_t thread)
1230 {
1231 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
1232 assert_thread_magic(thread);
1233
1234 mpsc_daemon_enqueue(&thread_stack_queue, &thread->mpsc_links,
1235 MPSC_QUEUE_DISABLE_PREEMPTION);
1236 }
1237
1238 void
thread_daemon_init(void)1239 thread_daemon_init(void)
1240 {
1241 kern_return_t result;
1242
1243 thread_deallocate_daemon_init();
1244
1245 thread_deallocate_daemon_register_queue(&thread_terminate_queue,
1246 thread_terminate_queue_invoke);
1247
1248 thread_deallocate_daemon_register_queue(&thread_deallocate_queue,
1249 thread_deallocate_queue_invoke);
1250
1251 smr_register_mpsc_queue();
1252
1253 ipc_object_deallocate_register_queue();
1254
1255 simple_lock_init(&crashed_threads_lock, 0);
1256 queue_init(&crashed_threads_queue);
1257
1258 result = mpsc_daemon_queue_init_with_thread(&thread_stack_queue,
1259 thread_stack_queue_invoke, BASEPRI_PREEMPT_HIGH,
1260 "daemon.thread-stack", MPSC_DAEMON_INIT_NONE);
1261 if (result != KERN_SUCCESS) {
1262 panic("thread_daemon_init: thread_stack_daemon");
1263 }
1264
1265 result = mpsc_daemon_queue_init_with_thread(&thread_exception_queue,
1266 thread_exception_queue_invoke, MINPRI_KERNEL,
1267 "daemon.thread-exception", MPSC_DAEMON_INIT_NONE);
1268
1269 if (result != KERN_SUCCESS) {
1270 panic("thread_daemon_init: thread_exception_daemon");
1271 }
1272
1273 result = mpsc_daemon_queue_init_with_thread(&thread_backtrace_queue,
1274 thread_backtrace_queue_invoke, MINPRI_KERNEL,
1275 "daemon.thread-backtrace", MPSC_DAEMON_INIT_NONE);
1276
1277 if (result != KERN_SUCCESS) {
1278 panic("thread_daemon_init: thread_backtrace_daemon");
1279 }
1280 }
1281
1282 __options_decl(thread_create_internal_options_t, uint32_t, {
1283 TH_OPTION_NONE = 0x00,
1284 TH_OPTION_NOSUSP = 0x02,
1285 TH_OPTION_WORKQ = 0x04,
1286 TH_OPTION_MAINTHREAD = 0x08,
1287 });
1288
1289 void
main_thread_set_immovable_pinned(thread_t thread)1290 main_thread_set_immovable_pinned(thread_t thread)
1291 {
1292 ipc_main_thread_set_immovable_pinned(thread);
1293 }
1294
1295 /*
1296 * Create a new thread.
1297 * Doesn't start the thread running.
1298 *
1299 * Task and tasks_threads_lock are returned locked on success.
1300 */
1301 static kern_return_t
thread_create_internal(task_t parent_task,integer_t priority,thread_continue_t continuation,void * parameter,thread_create_internal_options_t options,thread_t * out_thread)1302 thread_create_internal(
1303 task_t parent_task,
1304 integer_t priority,
1305 thread_continue_t continuation,
1306 void *parameter,
1307 thread_create_internal_options_t options,
1308 thread_t *out_thread)
1309 {
1310 thread_t new_thread;
1311 ipc_thread_init_options_t init_options = IPC_THREAD_INIT_NONE;
1312 struct thread_ro tro_tpl = { };
1313 bool first_thread = false;
1314 kern_return_t kr = KERN_FAILURE;
1315
1316 /*
1317 * Allocate a thread and initialize static fields
1318 */
1319 new_thread = zalloc_flags(thread_zone, Z_WAITOK | Z_NOFAIL);
1320
1321 if (__improbable(current_thread() == &init_thread)) {
1322 /*
1323 * The first thread ever is a global, but because we want to be
1324 * able to zone_id_require() threads, we have to stop using the
1325 * global piece of memory we used to boostrap the kernel and
1326 * jump to a proper thread from a zone.
1327 *
1328 * This is why that one thread will inherit its original
1329 * state differently.
1330 *
1331 * Also remember this thread in `vm_pageout_scan_thread`
1332 * as this is what the first thread ever becomes.
1333 *
1334 * Also pre-warm the depress timer since the VM pageout scan
1335 * daemon might need to use it.
1336 */
1337 assert(vm_pageout_scan_thread == THREAD_NULL);
1338 vm_pageout_scan_thread = new_thread;
1339
1340 first_thread = true;
1341 #pragma clang diagnostic push
1342 #pragma clang diagnostic ignored "-Wnontrivial-memaccess"
1343 /* work around 74481146 */
1344 memcpy(new_thread, &init_thread, sizeof(*new_thread));
1345 #pragma clang diagnostic pop
1346
1347 /*
1348 * Make the ctid table functional
1349 */
1350 ctid_table_init();
1351 new_thread->ctid = 0;
1352 } else {
1353 init_thread_from_template(new_thread);
1354 }
1355
1356 if (options & TH_OPTION_MAINTHREAD) {
1357 init_options |= IPC_THREAD_INIT_MAINTHREAD;
1358 }
1359
1360 os_ref_init_count_raw(&new_thread->ref_count, &thread_refgrp, 2);
1361 machine_thread_create(new_thread, parent_task, first_thread);
1362
1363 machine_thread_process_signature(new_thread, parent_task);
1364
1365 #ifdef MACH_BSD
1366 uthread_init(parent_task, get_bsdthread_info(new_thread),
1367 &tro_tpl, (options & TH_OPTION_WORKQ) != 0);
1368 if (!is_corpsetask(parent_task)) {
1369 /*
1370 * uthread_init will set tro_cred (with a +1)
1371 * and tro_proc for live tasks.
1372 */
1373 assert(tro_tpl.tro_cred && tro_tpl.tro_proc);
1374 }
1375 #endif /* MACH_BSD */
1376
1377 thread_lock_init(new_thread);
1378 wake_lock_init(new_thread);
1379
1380 lck_mtx_init(&new_thread->mutex, &thread_lck_grp, LCK_ATTR_NULL);
1381
1382 ipc_thread_init(parent_task, new_thread, &tro_tpl, init_options);
1383
1384 thread_ro_create(parent_task, new_thread, &tro_tpl);
1385
1386 new_thread->continuation = continuation;
1387 new_thread->parameter = parameter;
1388 new_thread->inheritor_flags = TURNSTILE_UPDATE_FLAGS_NONE;
1389 new_thread->requested_policy = default_thread_requested_policy;
1390 priority_queue_init(&new_thread->sched_inheritor_queue);
1391 priority_queue_init(&new_thread->base_inheritor_queue);
1392 #if CONFIG_SCHED_CLUTCH
1393 priority_queue_entry_init(&new_thread->th_clutch_runq_link);
1394 priority_queue_entry_init(&new_thread->th_clutch_pri_link);
1395 #endif /* CONFIG_SCHED_CLUTCH */
1396
1397 #if CONFIG_SCHED_EDGE
1398 new_thread->th_bound_cluster_enqueued = false;
1399 for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
1400 new_thread->th_shared_rsrc_enqueued[shared_rsrc_type] = false;
1401 new_thread->th_shared_rsrc_heavy_user[shared_rsrc_type] = false;
1402 new_thread->th_shared_rsrc_heavy_perf_control[shared_rsrc_type] = false;
1403 }
1404 #endif /* CONFIG_SCHED_EDGE */
1405 new_thread->th_bound_cluster_id = THREAD_BOUND_CLUSTER_NONE;
1406
1407 /* Allocate I/O Statistics structure */
1408 new_thread->thread_io_stats = kalloc_data(sizeof(struct io_stat_info),
1409 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1410
1411 #if KASAN_CLASSIC
1412 kasan_init_thread(&new_thread->kasan_data);
1413 #endif /* KASAN_CLASSIC */
1414
1415 #if CONFIG_KCOV
1416 kcov_init_thread(&new_thread->kcov_data);
1417 #endif
1418
1419 #if CONFIG_IOSCHED
1420 /* Clear out the I/O Scheduling info for AppleFSCompression */
1421 new_thread->decmp_upl = NULL;
1422 #endif /* CONFIG_IOSCHED */
1423
1424 new_thread->thread_region_page_shift = 0;
1425
1426 #if DEVELOPMENT || DEBUG
1427 task_lock(parent_task);
1428 uint16_t thread_limit = parent_task->task_thread_limit;
1429 if (exc_resource_threads_enabled &&
1430 thread_limit > 0 &&
1431 parent_task->thread_count >= thread_limit &&
1432 !parent_task->task_has_crossed_thread_limit &&
1433 !(parent_task->t_flags & TF_CORPSE)) {
1434 int thread_count = parent_task->thread_count;
1435 parent_task->task_has_crossed_thread_limit = TRUE;
1436 task_unlock(parent_task);
1437 SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(parent_task, thread_count);
1438 } else {
1439 task_unlock(parent_task);
1440 }
1441 #endif
1442
1443 lck_mtx_lock(&tasks_threads_lock);
1444 task_lock(parent_task);
1445
1446 /*
1447 * Fail thread creation if parent task is being torn down or has too many threads
1448 * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1449 */
1450 if (parent_task->active == 0 || parent_task->halting ||
1451 (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1452 (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1453 task_unlock(parent_task);
1454 lck_mtx_unlock(&tasks_threads_lock);
1455
1456 ipc_thread_disable(new_thread);
1457 ipc_thread_terminate(new_thread);
1458 kfree_data(new_thread->thread_io_stats,
1459 sizeof(struct io_stat_info));
1460 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1461 kr = KERN_FAILURE;
1462 goto out_thread_cleanup;
1463 }
1464
1465 /* Protected by the tasks_threads_lock */
1466 new_thread->thread_id = ++thread_unique_id;
1467
1468 ctid_table_add(new_thread);
1469
1470 /* New threads inherit any default state on the task */
1471 machine_thread_inherit_taskwide(new_thread, parent_task);
1472
1473 task_reference_grp(parent_task, TASK_GRP_INTERNAL);
1474
1475 if (parent_task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1476 /*
1477 * This task has a per-thread CPU limit; make sure this new thread
1478 * gets its limit set too, before it gets out of the kernel.
1479 */
1480 act_set_astledger(new_thread);
1481 }
1482
1483 /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1484 if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1485 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1486 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1487 }
1488
1489 new_thread->t_bankledger = LEDGER_NULL;
1490 new_thread->t_deduct_bank_ledger_time = 0;
1491 new_thread->t_deduct_bank_ledger_energy = 0;
1492
1493 new_thread->t_ledger = parent_task->ledger;
1494 if (new_thread->t_ledger) {
1495 ledger_reference(new_thread->t_ledger);
1496 }
1497
1498 recount_thread_init(&new_thread->th_recount);
1499
1500 #if defined(CONFIG_SCHED_MULTIQ)
1501 /* Cache the task's sched_group */
1502 new_thread->sched_group = parent_task->sched_group;
1503 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1504
1505 /* Cache the task's map */
1506 new_thread->map = parent_task->map;
1507
1508 new_thread->depress_timer = timer_call_alloc(thread_depress_expire, new_thread);
1509 new_thread->wait_timer = timer_call_alloc(thread_timer_expire, new_thread);
1510
1511 #if KPC
1512 kpc_thread_create(new_thread);
1513 #endif
1514
1515 /* Set the thread's scheduling parameters */
1516 new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1517 new_thread->max_priority = parent_task->max_priority;
1518 new_thread->task_priority = parent_task->priority;
1519
1520 #if CONFIG_THREAD_GROUPS
1521 thread_group_init_thread(new_thread, parent_task);
1522 #endif /* CONFIG_THREAD_GROUPS */
1523
1524 int new_priority = (priority < 0) ? parent_task->priority: priority;
1525 new_priority = (priority < 0)? parent_task->priority: priority;
1526 if (new_priority > new_thread->max_priority) {
1527 new_priority = new_thread->max_priority;
1528 }
1529 #if !defined(XNU_TARGET_OS_OSX)
1530 if (new_priority < MAXPRI_THROTTLE) {
1531 new_priority = MAXPRI_THROTTLE;
1532 }
1533 #endif /* !defined(XNU_TARGET_OS_OSX) */
1534
1535 new_thread->importance = new_priority - new_thread->task_priority;
1536
1537 sched_set_thread_base_priority(new_thread, new_priority);
1538
1539 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1540 new_thread->sched_stamp = sched_tick;
1541 #if CONFIG_SCHED_CLUTCH
1542 new_thread->pri_shift = sched_clutch_thread_pri_shift(new_thread, new_thread->th_sched_bucket);
1543 #else /* CONFIG_SCHED_CLUTCH */
1544 new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1545 #endif /* CONFIG_SCHED_CLUTCH */
1546 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1547
1548 if (parent_task->max_priority <= MAXPRI_THROTTLE) {
1549 sched_thread_mode_demote(new_thread, TH_SFLAG_THROTTLED);
1550 }
1551
1552 thread_policy_create(new_thread);
1553
1554 /* Chain the thread onto the task's list */
1555 queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1556 parent_task->thread_count++;
1557
1558 /* So terminating threads don't need to take the task lock to decrement */
1559 os_atomic_inc(&parent_task->active_thread_count, relaxed);
1560
1561 queue_enter(&threads, new_thread, thread_t, threads);
1562 threads_count++;
1563
1564 new_thread->active = TRUE;
1565 if (task_is_a_corpse_fork(parent_task)) {
1566 /* Set the inspection bit if the task is a corpse fork */
1567 new_thread->inspection = TRUE;
1568 } else {
1569 new_thread->inspection = FALSE;
1570 }
1571 new_thread->corpse_dup = FALSE;
1572 new_thread->turnstile = turnstile_alloc();
1573 new_thread->ctsid = turnstile_compact_id_get();
1574
1575
1576 *out_thread = new_thread;
1577
1578 if (kdebug_enable) {
1579 long args[4] = {};
1580
1581 kdbg_trace_data(get_bsdtask_info(parent_task), &args[1], &args[3]);
1582
1583 /*
1584 * Starting with 26604425, exec'ing creates a new task/thread.
1585 *
1586 * NEWTHREAD in the current process has two possible meanings:
1587 *
1588 * 1) Create a new thread for this process.
1589 * 2) Create a new thread for the future process this will become in an
1590 * exec.
1591 *
1592 * To disambiguate these, arg3 will be set to TRUE for case #2.
1593 *
1594 * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1595 * task exec'ing. The read of t_procflags does not take the proc_lock.
1596 */
1597 args[2] = task_is_exec_copy(parent_task) ? 1 : 0;
1598
1599 KDBG_RELEASE(TRACE_DATA_NEWTHREAD, (uintptr_t)thread_tid(new_thread),
1600 args[1], args[2], args[3]);
1601
1602 kdebug_proc_name_args(get_bsdtask_info(parent_task), args);
1603 KDBG_RELEASE(TRACE_STRING_NEWTHREAD, args[0], args[1], args[2],
1604 args[3]);
1605 }
1606
1607 DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1608
1609 kr = KERN_SUCCESS;
1610 goto done;
1611
1612 out_thread_cleanup:
1613 #ifdef MACH_BSD
1614 {
1615 struct uthread *ut = get_bsdthread_info(new_thread);
1616
1617 uthread_cleanup(ut, &tro_tpl);
1618 uthread_destroy(ut);
1619 }
1620 #endif /* MACH_BSD */
1621
1622 machine_thread_destroy(new_thread);
1623
1624 thread_ro_destroy(new_thread);
1625 zfree(thread_zone, new_thread);
1626
1627 done:
1628 return kr;
1629 }
1630
1631 static kern_return_t
thread_create_with_options_internal(task_t task,thread_t * new_thread,boolean_t from_user,thread_create_internal_options_t options,thread_continue_t continuation)1632 thread_create_with_options_internal(
1633 task_t task,
1634 thread_t *new_thread,
1635 boolean_t from_user,
1636 thread_create_internal_options_t options,
1637 thread_continue_t continuation)
1638 {
1639 kern_return_t result;
1640 thread_t thread;
1641
1642 if (task == TASK_NULL || task == kernel_task) {
1643 return KERN_INVALID_ARGUMENT;
1644 }
1645
1646 #if CONFIG_MACF
1647 if (from_user && current_task() != task &&
1648 mac_proc_check_remote_thread_create(task, -1, NULL, 0) != 0) {
1649 return KERN_DENIED;
1650 }
1651 #endif
1652
1653 result = thread_create_internal(task, -1, continuation, NULL, options, &thread);
1654 if (result != KERN_SUCCESS) {
1655 return result;
1656 }
1657
1658 thread->user_stop_count = 1;
1659 thread_hold(thread);
1660 if (task->suspend_count > 0) {
1661 thread_hold(thread);
1662 }
1663
1664 if (from_user) {
1665 extmod_statistics_incr_thread_create(task);
1666 }
1667
1668 task_unlock(task);
1669 lck_mtx_unlock(&tasks_threads_lock);
1670
1671 *new_thread = thread;
1672
1673 return KERN_SUCCESS;
1674 }
1675
1676 kern_return_t
thread_create_immovable(task_t task,thread_t * new_thread)1677 thread_create_immovable(
1678 task_t task,
1679 thread_t *new_thread)
1680 {
1681 return thread_create_with_options_internal(task, new_thread, FALSE,
1682 TH_OPTION_NONE, (thread_continue_t)thread_bootstrap_return);
1683 }
1684
1685 kern_return_t
thread_create_from_user(task_t task,thread_t * new_thread)1686 thread_create_from_user(
1687 task_t task,
1688 thread_t *new_thread)
1689 {
1690 /* All thread ports are created immovable by default */
1691 return thread_create_with_options_internal(task, new_thread, TRUE, TH_OPTION_NONE,
1692 (thread_continue_t)thread_bootstrap_return);
1693 }
1694
1695 kern_return_t
thread_create_with_continuation(task_t task,thread_t * new_thread,thread_continue_t continuation)1696 thread_create_with_continuation(
1697 task_t task,
1698 thread_t *new_thread,
1699 thread_continue_t continuation)
1700 {
1701 return thread_create_with_options_internal(task, new_thread, FALSE, TH_OPTION_NONE, continuation);
1702 }
1703
1704 /*
1705 * Create a thread that is already started, but is waiting on an event
1706 */
1707 static kern_return_t
thread_create_waiting_internal(task_t task,thread_continue_t continuation,event_t event,block_hint_t block_hint,thread_create_internal_options_t options,thread_t * new_thread)1708 thread_create_waiting_internal(
1709 task_t task,
1710 thread_continue_t continuation,
1711 event_t event,
1712 block_hint_t block_hint,
1713 thread_create_internal_options_t options,
1714 thread_t *new_thread)
1715 {
1716 kern_return_t result;
1717 thread_t thread;
1718 wait_interrupt_t wait_interrupt = THREAD_INTERRUPTIBLE;
1719
1720 if (task == TASK_NULL || task == kernel_task) {
1721 return KERN_INVALID_ARGUMENT;
1722 }
1723
1724 result = thread_create_internal(task, -1, continuation, NULL,
1725 options, &thread);
1726 if (result != KERN_SUCCESS) {
1727 return result;
1728 }
1729
1730 /* note no user_stop_count or thread_hold here */
1731
1732 if (task->suspend_count > 0) {
1733 thread_hold(thread);
1734 }
1735
1736 thread_mtx_lock(thread);
1737 thread_set_pending_block_hint(thread, block_hint);
1738 if (options & TH_OPTION_WORKQ) {
1739 thread->static_param = true;
1740 event = workq_thread_init_and_wq_lock(task, thread);
1741 } else if (options & TH_OPTION_MAINTHREAD) {
1742 wait_interrupt = THREAD_UNINT;
1743 }
1744 thread_start_in_assert_wait(thread, event, wait_interrupt);
1745 thread_mtx_unlock(thread);
1746
1747 task_unlock(task);
1748 lck_mtx_unlock(&tasks_threads_lock);
1749
1750 *new_thread = thread;
1751
1752 return KERN_SUCCESS;
1753 }
1754
1755 kern_return_t
main_thread_create_waiting(task_t task,thread_continue_t continuation,event_t event,thread_t * new_thread)1756 main_thread_create_waiting(
1757 task_t task,
1758 thread_continue_t continuation,
1759 event_t event,
1760 thread_t *new_thread)
1761 {
1762 return thread_create_waiting_internal(task, continuation, event,
1763 kThreadWaitNone, TH_OPTION_MAINTHREAD, new_thread);
1764 }
1765
1766
1767 static kern_return_t
thread_create_running_internal2(task_t task,int flavor,thread_state_t new_state,mach_msg_type_number_t new_state_count,thread_t * new_thread,boolean_t from_user)1768 thread_create_running_internal2(
1769 task_t task,
1770 int flavor,
1771 thread_state_t new_state,
1772 mach_msg_type_number_t new_state_count,
1773 thread_t *new_thread,
1774 boolean_t from_user)
1775 {
1776 kern_return_t result;
1777 thread_t thread;
1778
1779 if (task == TASK_NULL || task == kernel_task) {
1780 return KERN_INVALID_ARGUMENT;
1781 }
1782
1783 #if CONFIG_MACF
1784 if (from_user && current_task() != task &&
1785 mac_proc_check_remote_thread_create(task, flavor, new_state, new_state_count) != 0) {
1786 return KERN_DENIED;
1787 }
1788 #endif
1789
1790 result = thread_create_internal(task, -1,
1791 (thread_continue_t)thread_bootstrap_return, NULL,
1792 TH_OPTION_NONE, &thread);
1793 if (result != KERN_SUCCESS) {
1794 return result;
1795 }
1796
1797 if (task->suspend_count > 0) {
1798 thread_hold(thread);
1799 }
1800
1801 if (from_user) {
1802 result = machine_thread_state_convert_from_user(thread, flavor,
1803 new_state, new_state_count, NULL, 0, TSSF_FLAGS_NONE);
1804 }
1805 if (result == KERN_SUCCESS) {
1806 result = machine_thread_set_state(thread, flavor, new_state,
1807 new_state_count);
1808 }
1809 if (result != KERN_SUCCESS) {
1810 task_unlock(task);
1811 lck_mtx_unlock(&tasks_threads_lock);
1812
1813 thread_terminate(thread);
1814 thread_deallocate(thread);
1815 return result;
1816 }
1817
1818 thread_mtx_lock(thread);
1819 thread_start(thread);
1820 thread_mtx_unlock(thread);
1821
1822 if (from_user) {
1823 extmod_statistics_incr_thread_create(task);
1824 }
1825
1826 task_unlock(task);
1827 lck_mtx_unlock(&tasks_threads_lock);
1828
1829 *new_thread = thread;
1830
1831 return result;
1832 }
1833
1834 /* Prototype, see justification above */
1835 kern_return_t
1836 thread_create_running(
1837 task_t task,
1838 int flavor,
1839 thread_state_t new_state,
1840 mach_msg_type_number_t new_state_count,
1841 thread_t *new_thread);
1842
1843 kern_return_t
thread_create_running(task_t task,int flavor,thread_state_t new_state,mach_msg_type_number_t new_state_count,thread_t * new_thread)1844 thread_create_running(
1845 task_t task,
1846 int flavor,
1847 thread_state_t new_state,
1848 mach_msg_type_number_t new_state_count,
1849 thread_t *new_thread)
1850 {
1851 return thread_create_running_internal2(
1852 task, flavor, new_state, new_state_count,
1853 new_thread, FALSE);
1854 }
1855
1856 kern_return_t
thread_create_running_from_user(task_t task,int flavor,thread_state_t new_state,mach_msg_type_number_t new_state_count,thread_t * new_thread)1857 thread_create_running_from_user(
1858 task_t task,
1859 int flavor,
1860 thread_state_t new_state,
1861 mach_msg_type_number_t new_state_count,
1862 thread_t *new_thread)
1863 {
1864 return thread_create_running_internal2(
1865 task, flavor, new_state, new_state_count,
1866 new_thread, TRUE);
1867 }
1868
1869 kern_return_t
thread_create_workq_waiting(task_t task,thread_continue_t continuation,thread_t * new_thread)1870 thread_create_workq_waiting(
1871 task_t task,
1872 thread_continue_t continuation,
1873 thread_t *new_thread)
1874 {
1875 /*
1876 * Create thread, but don't pin control port just yet, in case someone calls
1877 * task_threads() and deallocates pinned port before kernel copyout happens,
1878 * which will result in pinned port guard exception. Instead, pin and copyout
1879 * atomically during workq_setup_and_run().
1880 */
1881 int options = TH_OPTION_NOSUSP | TH_OPTION_WORKQ;
1882 return thread_create_waiting_internal(task, continuation, NULL,
1883 kThreadWaitParkedWorkQueue, options, new_thread);
1884 }
1885
1886 /*
1887 * kernel_thread_create:
1888 *
1889 * Create a thread in the kernel task
1890 * to execute in kernel context.
1891 */
1892 kern_return_t
kernel_thread_create(thread_continue_t continuation,void * parameter,integer_t priority,thread_t * new_thread)1893 kernel_thread_create(
1894 thread_continue_t continuation,
1895 void *parameter,
1896 integer_t priority,
1897 thread_t *new_thread)
1898 {
1899 kern_return_t result;
1900 thread_t thread;
1901 task_t task = kernel_task;
1902
1903 result = thread_create_internal(task, priority, continuation, parameter,
1904 TH_OPTION_NONE, &thread);
1905 if (result != KERN_SUCCESS) {
1906 return result;
1907 }
1908
1909 task_unlock(task);
1910 lck_mtx_unlock(&tasks_threads_lock);
1911
1912 stack_alloc(thread);
1913 assert(thread->kernel_stack != 0);
1914 #if !defined(XNU_TARGET_OS_OSX)
1915 if (priority > BASEPRI_KERNEL)
1916 #endif
1917 thread->reserved_stack = thread->kernel_stack;
1918
1919 if (debug_task & 1) {
1920 kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1921 }
1922 *new_thread = thread;
1923
1924 return result;
1925 }
1926
1927 kern_return_t
kernel_thread_start_priority(thread_continue_t continuation,void * parameter,integer_t priority,thread_t * new_thread)1928 kernel_thread_start_priority(
1929 thread_continue_t continuation,
1930 void *parameter,
1931 integer_t priority,
1932 thread_t *new_thread)
1933 {
1934 kern_return_t result;
1935 thread_t thread;
1936
1937 result = kernel_thread_create(continuation, parameter, priority, &thread);
1938 if (result != KERN_SUCCESS) {
1939 return result;
1940 }
1941
1942 *new_thread = thread;
1943
1944 thread_mtx_lock(thread);
1945 thread_start(thread);
1946 thread_mtx_unlock(thread);
1947
1948 return result;
1949 }
1950
1951 kern_return_t
kernel_thread_start(thread_continue_t continuation,void * parameter,thread_t * new_thread)1952 kernel_thread_start(
1953 thread_continue_t continuation,
1954 void *parameter,
1955 thread_t *new_thread)
1956 {
1957 return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1958 }
1959
1960 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1961 /* it is assumed that the thread is locked by the caller */
1962 static void
retrieve_thread_basic_info(thread_t thread,thread_basic_info_t basic_info)1963 retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1964 {
1965 int state, flags;
1966
1967 /* fill in info */
1968
1969 thread_read_times(thread, &basic_info->user_time,
1970 &basic_info->system_time, NULL);
1971
1972 /*
1973 * Update lazy-evaluated scheduler info because someone wants it.
1974 */
1975 if (SCHED(can_update_priority)(thread)) {
1976 SCHED(update_priority)(thread);
1977 }
1978
1979 basic_info->sleep_time = 0;
1980
1981 /*
1982 * To calculate cpu_usage, first correct for timer rate,
1983 * then for 5/8 ageing. The correction factor [3/5] is
1984 * (1/(5/8) - 1).
1985 */
1986 basic_info->cpu_usage = 0;
1987 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1988 if (sched_tick_interval) {
1989 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1990 * TH_USAGE_SCALE) / sched_tick_interval);
1991 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1992 }
1993 #endif
1994
1995 if (basic_info->cpu_usage > TH_USAGE_SCALE) {
1996 basic_info->cpu_usage = TH_USAGE_SCALE;
1997 }
1998
1999 basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
2000 POLICY_TIMESHARE: POLICY_RR);
2001
2002 flags = 0;
2003 if (thread->options & TH_OPT_IDLE_THREAD) {
2004 flags |= TH_FLAGS_IDLE;
2005 }
2006
2007 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
2008 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
2009 }
2010
2011 if (!thread->kernel_stack) {
2012 flags |= TH_FLAGS_SWAPPED;
2013 }
2014
2015 state = 0;
2016 if (thread->state & TH_TERMINATE) {
2017 state = TH_STATE_HALTED;
2018 } else if (thread->state & TH_RUN) {
2019 state = TH_STATE_RUNNING;
2020 } else if (thread->state & TH_UNINT) {
2021 state = TH_STATE_UNINTERRUPTIBLE;
2022 } else if (thread->state & TH_SUSP) {
2023 state = TH_STATE_STOPPED;
2024 } else if (thread->state & TH_WAIT) {
2025 state = TH_STATE_WAITING;
2026 }
2027
2028 basic_info->run_state = state;
2029 basic_info->flags = flags;
2030
2031 basic_info->suspend_count = thread->user_stop_count;
2032
2033 return;
2034 }
2035
2036 kern_return_t
thread_info_internal(thread_t thread,thread_flavor_t flavor,thread_info_t thread_info_out,mach_msg_type_number_t * thread_info_count)2037 thread_info_internal(
2038 thread_t thread,
2039 thread_flavor_t flavor,
2040 thread_info_t thread_info_out, /* ptr to OUT array */
2041 mach_msg_type_number_t *thread_info_count) /*IN/OUT*/
2042 {
2043 spl_t s;
2044
2045 if (thread == THREAD_NULL) {
2046 return KERN_INVALID_ARGUMENT;
2047 }
2048
2049 if (flavor == THREAD_BASIC_INFO) {
2050 if (*thread_info_count < THREAD_BASIC_INFO_COUNT) {
2051 return KERN_INVALID_ARGUMENT;
2052 }
2053
2054 s = splsched();
2055 thread_lock(thread);
2056
2057 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
2058
2059 thread_unlock(thread);
2060 splx(s);
2061
2062 *thread_info_count = THREAD_BASIC_INFO_COUNT;
2063
2064 return KERN_SUCCESS;
2065 } else if (flavor == THREAD_IDENTIFIER_INFO) {
2066 thread_identifier_info_t identifier_info;
2067
2068 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT) {
2069 return KERN_INVALID_ARGUMENT;
2070 }
2071
2072 identifier_info = __IGNORE_WCASTALIGN((thread_identifier_info_t)thread_info_out);
2073
2074 s = splsched();
2075 thread_lock(thread);
2076
2077 identifier_info->thread_id = thread->thread_id;
2078 identifier_info->thread_handle = thread->machine.cthread_self;
2079 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
2080
2081 thread_unlock(thread);
2082 splx(s);
2083 return KERN_SUCCESS;
2084 } else if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
2085 policy_timeshare_info_t ts_info;
2086
2087 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT) {
2088 return KERN_INVALID_ARGUMENT;
2089 }
2090
2091 ts_info = (policy_timeshare_info_t)thread_info_out;
2092
2093 s = splsched();
2094 thread_lock(thread);
2095
2096 if (thread->sched_mode != TH_MODE_TIMESHARE) {
2097 thread_unlock(thread);
2098 splx(s);
2099 return KERN_INVALID_POLICY;
2100 }
2101
2102 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2103 if (ts_info->depressed) {
2104 ts_info->base_priority = DEPRESSPRI;
2105 ts_info->depress_priority = thread->base_pri;
2106 } else {
2107 ts_info->base_priority = thread->base_pri;
2108 ts_info->depress_priority = -1;
2109 }
2110
2111 ts_info->cur_priority = thread->sched_pri;
2112 ts_info->max_priority = thread->max_priority;
2113
2114 thread_unlock(thread);
2115 splx(s);
2116
2117 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
2118
2119 return KERN_SUCCESS;
2120 } else if (flavor == THREAD_SCHED_FIFO_INFO) {
2121 if (*thread_info_count < POLICY_FIFO_INFO_COUNT) {
2122 return KERN_INVALID_ARGUMENT;
2123 }
2124
2125 return KERN_INVALID_POLICY;
2126 } else if (flavor == THREAD_SCHED_RR_INFO) {
2127 policy_rr_info_t rr_info;
2128 uint32_t quantum_time;
2129 uint64_t quantum_ns;
2130
2131 if (*thread_info_count < POLICY_RR_INFO_COUNT) {
2132 return KERN_INVALID_ARGUMENT;
2133 }
2134
2135 rr_info = (policy_rr_info_t) thread_info_out;
2136
2137 s = splsched();
2138 thread_lock(thread);
2139
2140 if (thread->sched_mode == TH_MODE_TIMESHARE) {
2141 thread_unlock(thread);
2142 splx(s);
2143
2144 return KERN_INVALID_POLICY;
2145 }
2146
2147 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2148 if (rr_info->depressed) {
2149 rr_info->base_priority = DEPRESSPRI;
2150 rr_info->depress_priority = thread->base_pri;
2151 } else {
2152 rr_info->base_priority = thread->base_pri;
2153 rr_info->depress_priority = -1;
2154 }
2155
2156 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2157 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
2158
2159 rr_info->max_priority = thread->max_priority;
2160 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2161
2162 thread_unlock(thread);
2163 splx(s);
2164
2165 *thread_info_count = POLICY_RR_INFO_COUNT;
2166
2167 return KERN_SUCCESS;
2168 } else if (flavor == THREAD_EXTENDED_INFO) {
2169 thread_basic_info_data_t basic_info;
2170 thread_extended_info_t extended_info = __IGNORE_WCASTALIGN((thread_extended_info_t)thread_info_out);
2171
2172 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
2173 return KERN_INVALID_ARGUMENT;
2174 }
2175
2176 s = splsched();
2177 thread_lock(thread);
2178
2179 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
2180 * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
2181 */
2182 retrieve_thread_basic_info(thread, &basic_info);
2183 extended_info->pth_user_time = (((uint64_t)basic_info.user_time.seconds * NSEC_PER_SEC) + ((uint64_t)basic_info.user_time.microseconds * NSEC_PER_USEC));
2184 extended_info->pth_system_time = (((uint64_t)basic_info.system_time.seconds * NSEC_PER_SEC) + ((uint64_t)basic_info.system_time.microseconds * NSEC_PER_USEC));
2185
2186 extended_info->pth_cpu_usage = basic_info.cpu_usage;
2187 extended_info->pth_policy = basic_info.policy;
2188 extended_info->pth_run_state = basic_info.run_state;
2189 extended_info->pth_flags = basic_info.flags;
2190 extended_info->pth_sleep_time = basic_info.sleep_time;
2191 extended_info->pth_curpri = thread->sched_pri;
2192 extended_info->pth_priority = thread->base_pri;
2193 extended_info->pth_maxpriority = thread->max_priority;
2194
2195 bsd_getthreadname(get_bsdthread_info(thread), extended_info->pth_name);
2196
2197 thread_unlock(thread);
2198 splx(s);
2199
2200 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
2201
2202 return KERN_SUCCESS;
2203 } else if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
2204 #if DEVELOPMENT || DEBUG
2205 thread_debug_info_internal_t dbg_info;
2206 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT) {
2207 return KERN_NOT_SUPPORTED;
2208 }
2209
2210 if (thread_info_out == NULL) {
2211 return KERN_INVALID_ARGUMENT;
2212 }
2213
2214 dbg_info = __IGNORE_WCASTALIGN((thread_debug_info_internal_t)thread_info_out);
2215 dbg_info->page_creation_count = thread->t_page_creation_count;
2216
2217 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
2218 return KERN_SUCCESS;
2219 #endif /* DEVELOPMENT || DEBUG */
2220 return KERN_NOT_SUPPORTED;
2221 }
2222
2223 return KERN_INVALID_ARGUMENT;
2224 }
2225
2226 static void
_convert_mach_to_time_value(uint64_t time_mach,time_value_t * time)2227 _convert_mach_to_time_value(uint64_t time_mach, time_value_t *time)
2228 {
2229 clock_sec_t secs;
2230 clock_usec_t usecs;
2231 absolutetime_to_microtime(time_mach, &secs, &usecs);
2232 time->seconds = (typeof(time->seconds))secs;
2233 time->microseconds = usecs;
2234 }
2235
2236 void
thread_read_times(thread_t thread,time_value_t * user_time,time_value_t * system_time,time_value_t * runnable_time)2237 thread_read_times(
2238 thread_t thread,
2239 time_value_t *user_time,
2240 time_value_t *system_time,
2241 time_value_t *runnable_time)
2242 {
2243 if (user_time && system_time) {
2244 struct recount_times_mach times = recount_thread_times(thread);
2245 _convert_mach_to_time_value(times.rtm_user, user_time);
2246 _convert_mach_to_time_value(times.rtm_system, system_time);
2247 }
2248
2249 if (runnable_time) {
2250 uint64_t runnable_time_mach = timer_grab(&thread->runnable_timer);
2251 _convert_mach_to_time_value(runnable_time_mach, runnable_time);
2252 }
2253 }
2254
2255 uint64_t
thread_get_runtime_self(void)2256 thread_get_runtime_self(void)
2257 {
2258 /*
2259 * Must be guaranteed to stay on the same CPU and not be updated by the
2260 * scheduler.
2261 */
2262 boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
2263 uint64_t time_mach = recount_current_thread_time_mach();
2264 ml_set_interrupts_enabled(interrupt_state);
2265 return time_mach;
2266 }
2267
2268 /*
2269 * thread_wire_internal:
2270 *
2271 * Specify that the target thread must always be able
2272 * to run and to allocate memory.
2273 */
2274 kern_return_t
thread_wire_internal(host_priv_t host_priv,thread_t thread,boolean_t wired,boolean_t * prev_state)2275 thread_wire_internal(
2276 host_priv_t host_priv,
2277 thread_t thread,
2278 boolean_t wired,
2279 boolean_t *prev_state)
2280 {
2281 if (host_priv == NULL || thread != current_thread()) {
2282 return KERN_INVALID_ARGUMENT;
2283 }
2284
2285 if (prev_state) {
2286 *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
2287 }
2288
2289 if (wired) {
2290 if (!(thread->options & TH_OPT_VMPRIV)) {
2291 vm_page_free_reserve(1); /* XXX */
2292 }
2293 thread->options |= TH_OPT_VMPRIV;
2294 } else {
2295 if (thread->options & TH_OPT_VMPRIV) {
2296 vm_page_free_reserve(-1); /* XXX */
2297 }
2298 thread->options &= ~TH_OPT_VMPRIV;
2299 }
2300
2301 return KERN_SUCCESS;
2302 }
2303
2304
2305 /*
2306 * thread_wire:
2307 *
2308 * User-api wrapper for thread_wire_internal()
2309 */
2310 kern_return_t
thread_wire(host_priv_t host_priv,thread_t thread,boolean_t wired)2311 thread_wire(
2312 host_priv_t host_priv,
2313 thread_t thread,
2314 boolean_t wired)
2315 {
2316 return thread_wire_internal(host_priv, thread, wired, NULL);
2317 }
2318
2319 boolean_t
is_external_pageout_thread(void)2320 is_external_pageout_thread(void)
2321 {
2322 return current_thread() == vm_pageout_state.vm_pageout_external_iothread;
2323 }
2324
2325 boolean_t
is_vm_privileged(void)2326 is_vm_privileged(void)
2327 {
2328 return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2329 }
2330
2331 boolean_t
set_vm_privilege(boolean_t privileged)2332 set_vm_privilege(boolean_t privileged)
2333 {
2334 boolean_t was_vmpriv;
2335
2336 if (current_thread()->options & TH_OPT_VMPRIV) {
2337 was_vmpriv = TRUE;
2338 } else {
2339 was_vmpriv = FALSE;
2340 }
2341
2342 if (privileged != FALSE) {
2343 current_thread()->options |= TH_OPT_VMPRIV;
2344 } else {
2345 current_thread()->options &= ~TH_OPT_VMPRIV;
2346 }
2347
2348 return was_vmpriv;
2349 }
2350
2351 void
thread_floor_boost_set_promotion_locked(thread_t thread)2352 thread_floor_boost_set_promotion_locked(thread_t thread)
2353 {
2354 assert(thread->priority_floor_count > 0);
2355
2356 if (!(thread->sched_flags & TH_SFLAG_FLOOR_PROMOTED)) {
2357 sched_thread_promote_reason(thread, TH_SFLAG_FLOOR_PROMOTED, 0);
2358 }
2359 }
2360
2361 /*! @function thread_priority_floor_start
2362 * @abstract boost the current thread priority to floor.
2363 * @discussion Increase the priority of the current thread to at least MINPRI_FLOOR.
2364 * The boost will be mantained until a corresponding thread_priority_floor_end()
2365 * is called. Every call of thread_priority_floor_start() needs to have a corresponding
2366 * call to thread_priority_floor_end() from the same thread.
2367 * No thread can return to userspace before calling thread_priority_floor_end().
2368 *
2369 * NOTE: avoid to use this function. Try to use gate_t or sleep_with_inheritor()
2370 * instead.
2371 * @result a token to be given to the corresponding thread_priority_floor_end()
2372 */
2373 thread_pri_floor_t
thread_priority_floor_start(void)2374 thread_priority_floor_start(void)
2375 {
2376 thread_pri_floor_t ret;
2377 thread_t thread = current_thread();
2378 __assert_only uint16_t prev_priority_floor_count;
2379
2380 assert(thread->priority_floor_count < UINT16_MAX);
2381 prev_priority_floor_count = thread->priority_floor_count++;
2382 #if MACH_ASSERT
2383 /*
2384 * Set the ast to check that the
2385 * priority_floor_count is going to be set to zero when
2386 * going back to userspace.
2387 * Set it only once when we increment it for the first time.
2388 */
2389 if (prev_priority_floor_count == 0) {
2390 act_set_debug_assert();
2391 }
2392 #endif
2393
2394 ret.thread = thread;
2395 return ret;
2396 }
2397
2398 /*! @function thread_priority_floor_end
2399 * @abstract ends the floor boost.
2400 * @param token the token obtained from thread_priority_floor_start()
2401 * @discussion ends the priority floor boost started with thread_priority_floor_start()
2402 */
2403 void
thread_priority_floor_end(thread_pri_floor_t * token)2404 thread_priority_floor_end(thread_pri_floor_t *token)
2405 {
2406 thread_t thread = current_thread();
2407
2408 assert(thread->priority_floor_count > 0);
2409 assertf(token->thread == thread, "thread_priority_floor_end called from a different thread from thread_priority_floor_start %p %p", thread, token->thread);
2410
2411 if ((thread->priority_floor_count-- == 1) && (thread->sched_flags & TH_SFLAG_FLOOR_PROMOTED)) {
2412 spl_t s = splsched();
2413 thread_lock(thread);
2414
2415 if (thread->sched_flags & TH_SFLAG_FLOOR_PROMOTED) {
2416 sched_thread_unpromote_reason(thread, TH_SFLAG_FLOOR_PROMOTED, 0);
2417 }
2418
2419 thread_unlock(thread);
2420 splx(s);
2421 }
2422
2423 token->thread = NULL;
2424 }
2425
2426 /*
2427 * XXX assuming current thread only, for now...
2428 */
2429 void
thread_guard_violation(thread_t thread,mach_exception_data_type_t code,mach_exception_data_type_t subcode,boolean_t fatal)2430 thread_guard_violation(thread_t thread,
2431 mach_exception_data_type_t code, mach_exception_data_type_t subcode, boolean_t fatal)
2432 {
2433 assert(thread == current_thread());
2434
2435 /* Don't set up the AST for kernel threads; this check is needed to ensure
2436 * that the guard_exc_* fields in the thread structure are set only by the
2437 * current thread and therefore, don't require a lock.
2438 */
2439 if (get_threadtask(thread) == kernel_task) {
2440 return;
2441 }
2442
2443 assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
2444
2445 /*
2446 * Use the saved state area of the thread structure
2447 * to store all info required to handle the AST when
2448 * returning to userspace. It's possible that there is
2449 * already a pending guard exception. If it's non-fatal,
2450 * it can only be over-written by a fatal exception code.
2451 */
2452 if (thread->guard_exc_info.code && (thread->guard_exc_fatal || !fatal)) {
2453 return;
2454 }
2455
2456 thread->guard_exc_info.code = code;
2457 thread->guard_exc_info.subcode = subcode;
2458 thread->guard_exc_fatal = fatal ? 1 : 0;
2459
2460 spl_t s = splsched();
2461 thread_ast_set(thread, AST_GUARD);
2462 ast_propagate(thread);
2463 splx(s);
2464 }
2465
2466 #if CONFIG_DEBUG_SYSCALL_REJECTION
2467 extern void rejected_syscall_guard_ast(thread_t __unused t, mach_exception_data_type_t code, mach_exception_data_type_t subcode);
2468 #endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
2469
2470 /*
2471 * guard_ast:
2472 *
2473 * Handle AST_GUARD for a thread. This routine looks at the
2474 * state saved in the thread structure to determine the cause
2475 * of this exception. Based on this value, it invokes the
2476 * appropriate routine which determines other exception related
2477 * info and raises the exception.
2478 */
2479 void
guard_ast(thread_t t)2480 guard_ast(thread_t t)
2481 {
2482 const mach_exception_data_type_t
2483 code = t->guard_exc_info.code,
2484 subcode = t->guard_exc_info.subcode;
2485
2486 t->guard_exc_info.code = 0;
2487 t->guard_exc_info.subcode = 0;
2488 t->guard_exc_fatal = 0;
2489
2490 switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
2491 case GUARD_TYPE_NONE:
2492 /* lingering AST_GUARD on the processor? */
2493 break;
2494 case GUARD_TYPE_MACH_PORT:
2495 mach_port_guard_ast(t, code, subcode);
2496 break;
2497 case GUARD_TYPE_FD:
2498 fd_guard_ast(t, code, subcode);
2499 break;
2500 #if CONFIG_VNGUARD
2501 case GUARD_TYPE_VN:
2502 vn_guard_ast(t, code, subcode);
2503 break;
2504 #endif
2505 case GUARD_TYPE_VIRT_MEMORY:
2506 virt_memory_guard_ast(t, code, subcode);
2507 break;
2508 #if CONFIG_DEBUG_SYSCALL_REJECTION
2509 case GUARD_TYPE_REJECTED_SC:
2510 rejected_syscall_guard_ast(t, code, subcode);
2511 break;
2512 #endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
2513 default:
2514 panic("guard_exc_info %llx %llx", code, subcode);
2515 }
2516 }
2517
2518 static void
thread_cputime_callback(int warning,__unused const void * arg0,__unused const void * arg1)2519 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2520 {
2521 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2522 #if CONFIG_TELEMETRY
2523 /*
2524 * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2525 * on the entire task so there are micro-stackshots available if and when
2526 * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2527 * for this thread only; but now that this task is suspect, knowing what all of
2528 * its threads are up to will be useful.
2529 */
2530 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2531 #endif
2532 return;
2533 }
2534
2535 #if CONFIG_TELEMETRY
2536 /*
2537 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2538 * exceeded the limit, turn telemetry off for the task.
2539 */
2540 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2541 #endif
2542
2543 if (warning == 0) {
2544 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2545 }
2546 }
2547
2548 void __attribute__((noinline))
SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)2549 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2550 {
2551 int pid = 0;
2552 task_t task = current_task();
2553 thread_t thread = current_thread();
2554 uint64_t tid = thread->thread_id;
2555 const char *procname = "unknown";
2556 time_value_t thread_total_time = {0, 0};
2557 time_value_t thread_system_time;
2558 time_value_t thread_user_time;
2559 int action;
2560 uint8_t percentage;
2561 uint32_t usage_percent = 0;
2562 uint32_t interval_sec;
2563 uint64_t interval_ns;
2564 uint64_t balance_ns;
2565 boolean_t fatal = FALSE;
2566 boolean_t send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2567 kern_return_t kr;
2568
2569 #ifdef EXC_RESOURCE_MONITORS
2570 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2571 #endif /* EXC_RESOURCE_MONITORS */
2572 struct ledger_entry_info lei;
2573
2574 assert(thread->t_threadledger != LEDGER_NULL);
2575
2576 /*
2577 * Extract the fatal bit and suspend the monitor (which clears the bit).
2578 */
2579 task_lock(task);
2580 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2581 fatal = TRUE;
2582 send_exc_resource = TRUE;
2583 }
2584 /* Only one thread can be here at a time. Whichever makes it through
2585 * first will successfully suspend the monitor and proceed to send the
2586 * notification. Other threads will get an error trying to suspend the
2587 * monitor and give up on sending the notification. In the first release,
2588 * the monitor won't be resumed for a number of seconds, but we may
2589 * eventually need to handle low-latency resume.
2590 */
2591 kr = task_suspend_cpumon(task);
2592 task_unlock(task);
2593 if (kr == KERN_INVALID_ARGUMENT) {
2594 return;
2595 }
2596
2597 #ifdef MACH_BSD
2598 pid = proc_selfpid();
2599 void *bsd_info = get_bsdtask_info(task);
2600 if (bsd_info != NULL) {
2601 procname = proc_name_address(bsd_info);
2602 }
2603 #endif
2604
2605 thread_get_cpulimit(&action, &percentage, &interval_ns);
2606
2607 interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2608
2609 thread_read_times(thread, &thread_user_time, &thread_system_time, NULL);
2610 time_value_add(&thread_total_time, &thread_user_time);
2611 time_value_add(&thread_total_time, &thread_system_time);
2612 ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2613
2614 /* credit/debit/balance/limit are in absolute time units;
2615 * the refill info is in nanoseconds. */
2616 absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2617 if (lei.lei_last_refill > 0) {
2618 usage_percent = (uint32_t)((balance_ns * 100ULL) / lei.lei_last_refill);
2619 }
2620
2621 /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2622 printf("process %s[%d] thread %llu caught burning CPU! It used more than %d%% CPU over %u seconds\n",
2623 procname, pid, tid, percentage, interval_sec);
2624 printf(" (actual recent usage: %d%% over ~%llu seconds)\n",
2625 usage_percent, (lei.lei_last_refill + NSEC_PER_SEC / 2) / NSEC_PER_SEC);
2626 printf(" Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys)\n",
2627 thread_total_time.seconds, thread_total_time.microseconds,
2628 thread_user_time.seconds, thread_user_time.microseconds,
2629 thread_system_time.seconds, thread_system_time.microseconds);
2630 printf(" Ledger balance: %lld; mabs credit: %lld; mabs debit: %lld\n",
2631 lei.lei_balance, lei.lei_credit, lei.lei_debit);
2632 printf(" mabs limit: %llu; mabs period: %llu ns; last refill: %llu ns%s.\n",
2633 lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2634 (fatal ? " [fatal violation]" : ""));
2635
2636 /*
2637 * For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE. Once
2638 * we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2639 */
2640
2641 /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2642 lei.lei_balance = balance_ns;
2643 absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2644 trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2645 kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2646 fatal ? kRNFatalLimitFlag : 0);
2647 if (kr) {
2648 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2649 }
2650
2651 #ifdef EXC_RESOURCE_MONITORS
2652 if (send_exc_resource) {
2653 if (disable_exc_resource) {
2654 printf("process %s[%d] thread %llu caught burning CPU! "
2655 "EXC_RESOURCE%s supressed by a boot-arg\n",
2656 procname, pid, tid, fatal ? " (and termination)" : "");
2657 return;
2658 }
2659
2660 if (audio_active) {
2661 printf("process %s[%d] thread %llu caught burning CPU! "
2662 "EXC_RESOURCE & termination supressed due to audio playback\n",
2663 procname, pid, tid);
2664 return;
2665 }
2666 }
2667
2668
2669 if (send_exc_resource) {
2670 code[0] = code[1] = 0;
2671 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2672 if (fatal) {
2673 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2674 } else {
2675 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2676 }
2677 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2678 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2679 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2680 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2681 }
2682 #endif /* EXC_RESOURCE_MONITORS */
2683
2684 if (fatal) {
2685 #if CONFIG_JETSAM
2686 jetsam_on_ledger_cpulimit_exceeded();
2687 #else
2688 task_terminate_internal(task);
2689 #endif
2690 }
2691 }
2692
2693 bool os_variant_has_internal_diagnostics(const char *subsystem);
2694
2695 #if DEVELOPMENT || DEBUG
2696 void __attribute__((noinline))
SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t task,int thread_count)2697 SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t task, int thread_count)
2698 {
2699 mach_exception_data_type_t code[EXCEPTION_CODE_MAX] = {0};
2700 int pid = task_pid(task);
2701 char procname[MAXCOMLEN + 1] = "unknown";
2702
2703 if (pid == 1) {
2704 /*
2705 * Cannot suspend launchd
2706 */
2707 return;
2708 }
2709
2710 proc_name(pid, procname, sizeof(procname));
2711
2712 if (disable_exc_resource) {
2713 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2714 "supressed by a boot-arg.\n", procname, pid, thread_count);
2715 return;
2716 }
2717
2718 if (!os_variant_has_internal_diagnostics("com.apple.xnu")) {
2719 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2720 "supressed, internal diagnostics disabled.\n", procname, pid, thread_count);
2721 return;
2722 }
2723
2724 if (audio_active) {
2725 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2726 "supressed due to audio playback.\n", procname, pid, thread_count);
2727 return;
2728 }
2729
2730 if (!exc_via_corpse_forking) {
2731 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2732 "supressed due to corpse forking being disabled.\n", procname, pid,
2733 thread_count);
2734 return;
2735 }
2736
2737 printf("process %s[%d] crossed thread count high watermark (%d), sending "
2738 "EXC_RESOURCE\n", procname, pid, thread_count);
2739
2740 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_THREADS);
2741 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_THREADS_HIGH_WATERMARK);
2742 EXC_RESOURCE_THREADS_ENCODE_THREADS(code[0], thread_count);
2743
2744 task_enqueue_exception_with_corpse(task, EXC_RESOURCE, code, EXCEPTION_CODE_MAX, NULL, FALSE);
2745 }
2746 #endif /* DEVELOPMENT || DEBUG */
2747
2748 void
thread_update_io_stats(thread_t thread,int size,int io_flags)2749 thread_update_io_stats(thread_t thread, int size, int io_flags)
2750 {
2751 task_t task = get_threadtask(thread);
2752 int io_tier;
2753
2754 if (thread->thread_io_stats == NULL || task->task_io_stats == NULL) {
2755 return;
2756 }
2757
2758 if (io_flags & DKIO_READ) {
2759 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2760 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->disk_reads, size);
2761 }
2762
2763 if (io_flags & DKIO_META) {
2764 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2765 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->metadata, size);
2766 }
2767
2768 if (io_flags & DKIO_PAGING) {
2769 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2770 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->paging, size);
2771 }
2772
2773 io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2774 assert(io_tier < IO_NUM_PRIORITIES);
2775
2776 UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2777 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->io_priority[io_tier], size);
2778
2779 /* Update Total I/O Counts */
2780 UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2781 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->total_io, size);
2782
2783 if (!(io_flags & DKIO_READ)) {
2784 DTRACE_IO3(physical_writes, struct task *, task, uint32_t, size, int, io_flags);
2785 ledger_credit(task->ledger, task_ledgers.physical_writes, size);
2786 }
2787 }
2788
2789 static void
init_thread_ledgers(void)2790 init_thread_ledgers(void)
2791 {
2792 ledger_template_t t;
2793 int idx;
2794
2795 assert(thread_ledger_template == NULL);
2796
2797 if ((t = ledger_template_create("Per-thread ledger")) == NULL) {
2798 panic("couldn't create thread ledger template");
2799 }
2800
2801 if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2802 panic("couldn't create cpu_time entry for thread ledger template");
2803 }
2804
2805 if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2806 panic("couldn't set thread ledger callback for cpu_time entry");
2807 }
2808
2809 thread_ledgers.cpu_time = idx;
2810
2811 ledger_template_complete(t);
2812 thread_ledger_template = t;
2813 }
2814
2815 /*
2816 * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2817 */
2818 int
thread_get_cpulimit(int * action,uint8_t * percentage,uint64_t * interval_ns)2819 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2820 {
2821 int64_t abstime = 0;
2822 uint64_t limittime = 0;
2823 thread_t thread = current_thread();
2824
2825 *percentage = 0;
2826 *interval_ns = 0;
2827 *action = 0;
2828
2829 if (thread->t_threadledger == LEDGER_NULL) {
2830 /*
2831 * This thread has no per-thread ledger, so it can't possibly
2832 * have a CPU limit applied.
2833 */
2834 return KERN_SUCCESS;
2835 }
2836
2837 ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2838 ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2839
2840 if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2841 /*
2842 * This thread's CPU time ledger has no period or limit; so it
2843 * doesn't have a CPU limit applied.
2844 */
2845 return KERN_SUCCESS;
2846 }
2847
2848 /*
2849 * This calculation is the converse to the one in thread_set_cpulimit().
2850 */
2851 absolutetime_to_nanoseconds(abstime, &limittime);
2852 *percentage = (uint8_t)((limittime * 100ULL) / *interval_ns);
2853 assert(*percentage <= 100);
2854
2855 if (thread->options & TH_OPT_PROC_CPULIMIT) {
2856 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2857
2858 *action = THREAD_CPULIMIT_BLOCK;
2859 } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2860 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2861
2862 *action = THREAD_CPULIMIT_EXCEPTION;
2863 } else {
2864 *action = THREAD_CPULIMIT_DISABLE;
2865 }
2866
2867 return KERN_SUCCESS;
2868 }
2869
2870 /*
2871 * Set CPU usage limit on a thread.
2872 *
2873 * Calling with percentage of 0 will unset the limit for this thread.
2874 */
2875 int
thread_set_cpulimit(int action,uint8_t percentage,uint64_t interval_ns)2876 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2877 {
2878 thread_t thread = current_thread();
2879 ledger_t l;
2880 uint64_t limittime = 0;
2881 uint64_t abstime = 0;
2882
2883 assert(percentage <= 100);
2884
2885 if (action == THREAD_CPULIMIT_DISABLE) {
2886 /*
2887 * Remove CPU limit, if any exists.
2888 */
2889 if (thread->t_threadledger != LEDGER_NULL) {
2890 l = thread->t_threadledger;
2891 ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2892 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2893 thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2894 }
2895
2896 return 0;
2897 }
2898
2899 if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2900 return KERN_INVALID_ARGUMENT;
2901 }
2902
2903 l = thread->t_threadledger;
2904 if (l == LEDGER_NULL) {
2905 /*
2906 * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2907 */
2908 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL) {
2909 return KERN_RESOURCE_SHORTAGE;
2910 }
2911
2912 /*
2913 * We are the first to create this thread's ledger, so only activate our entry.
2914 */
2915 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2916 thread->t_threadledger = l;
2917 }
2918
2919 /*
2920 * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2921 * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2922 */
2923 limittime = (interval_ns * percentage) / 100;
2924 nanoseconds_to_absolutetime(limittime, &abstime);
2925 ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2926 /*
2927 * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2928 */
2929 ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2930
2931 if (action == THREAD_CPULIMIT_EXCEPTION) {
2932 /*
2933 * We don't support programming the CPU usage monitor on a task if any of its
2934 * threads have a per-thread blocking CPU limit configured.
2935 */
2936 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2937 panic("CPU usage monitor activated, but blocking thread limit exists");
2938 }
2939
2940 /*
2941 * Make a note that this thread's CPU limit is being used for the task-wide CPU
2942 * usage monitor. We don't have to arm the callback which will trigger the
2943 * exception, because that was done for us in ledger_instantiate (because the
2944 * ledger template used has a default callback).
2945 */
2946 thread->options |= TH_OPT_PROC_CPULIMIT;
2947 } else {
2948 /*
2949 * We deliberately override any CPU limit imposed by a task-wide limit (eg
2950 * CPU usage monitor).
2951 */
2952 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2953
2954 thread->options |= TH_OPT_PRVT_CPULIMIT;
2955 /* The per-thread ledger template by default has a callback for CPU time */
2956 ledger_disable_callback(l, thread_ledgers.cpu_time);
2957 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2958 }
2959
2960 return 0;
2961 }
2962
2963 void
thread_sched_call(thread_t thread,sched_call_t call)2964 thread_sched_call(
2965 thread_t thread,
2966 sched_call_t call)
2967 {
2968 assert((thread->state & TH_WAIT_REPORT) == 0);
2969 thread->sched_call = call;
2970 }
2971
2972 uint64_t
thread_tid(thread_t thread)2973 thread_tid(
2974 thread_t thread)
2975 {
2976 return thread != THREAD_NULL? thread->thread_id: 0;
2977 }
2978
2979 uint64_t
uthread_tid(struct uthread * uth)2980 uthread_tid(
2981 struct uthread *uth)
2982 {
2983 if (uth) {
2984 return thread_tid(get_machthread(uth));
2985 }
2986 return 0;
2987 }
2988
2989 uint16_t
thread_set_tag(thread_t th,uint16_t tag)2990 thread_set_tag(thread_t th, uint16_t tag)
2991 {
2992 return thread_set_tag_internal(th, tag);
2993 }
2994
2995 uint16_t
thread_get_tag(thread_t th)2996 thread_get_tag(thread_t th)
2997 {
2998 return thread_get_tag_internal(th);
2999 }
3000
3001 uint64_t
thread_last_run_time(thread_t th)3002 thread_last_run_time(thread_t th)
3003 {
3004 return th->last_run_time;
3005 }
3006
3007 /*
3008 * Shared resource contention management
3009 *
3010 * The scheduler attempts to load balance the shared resource intensive
3011 * workloads across clusters to ensure that the resource is not heavily
3012 * contended. The kernel relies on external agents (userspace or
3013 * performance controller) to identify shared resource heavy threads.
3014 * The load balancing is achieved based on the scheduler configuration
3015 * enabled on the platform.
3016 */
3017
3018
3019 #if CONFIG_SCHED_EDGE
3020
3021 /*
3022 * On the Edge scheduler, the load balancing is achieved by looking
3023 * at cluster level shared resource loads and migrating resource heavy
3024 * threads dynamically to under utilized cluster. Therefore, when a
3025 * thread is indicated as a resource heavy thread, the policy set
3026 * routine simply adds a flag to the thread which is looked at by
3027 * the scheduler on thread migration decisions.
3028 */
3029
3030 boolean_t
thread_shared_rsrc_policy_get(thread_t thread,cluster_shared_rsrc_type_t type)3031 thread_shared_rsrc_policy_get(thread_t thread, cluster_shared_rsrc_type_t type)
3032 {
3033 return thread->th_shared_rsrc_heavy_user[type] || thread->th_shared_rsrc_heavy_perf_control[type];
3034 }
3035
3036 __options_decl(sched_edge_rsrc_heavy_thread_state, uint32_t, {
3037 SCHED_EDGE_RSRC_HEAVY_THREAD_SET = 1,
3038 SCHED_EDGE_RSRC_HEAVY_THREAD_CLR = 2,
3039 });
3040
3041 kern_return_t
thread_shared_rsrc_policy_set(thread_t thread,__unused uint32_t index,cluster_shared_rsrc_type_t type,shared_rsrc_policy_agent_t agent)3042 thread_shared_rsrc_policy_set(thread_t thread, __unused uint32_t index, cluster_shared_rsrc_type_t type, shared_rsrc_policy_agent_t agent)
3043 {
3044 spl_t s = splsched();
3045 thread_lock(thread);
3046
3047 bool user = (agent == SHARED_RSRC_POLICY_AGENT_DISPATCH) || (agent == SHARED_RSRC_POLICY_AGENT_SYSCTL);
3048 bool *thread_flags = (user) ? thread->th_shared_rsrc_heavy_user : thread->th_shared_rsrc_heavy_perf_control;
3049 if (thread_flags[type]) {
3050 thread_unlock(thread);
3051 splx(s);
3052 return KERN_FAILURE;
3053 }
3054
3055 thread_flags[type] = true;
3056 thread_unlock(thread);
3057 splx(s);
3058
3059 KDBG(MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_EDGE_RSRC_HEAVY_THREAD) | DBG_FUNC_NONE, SCHED_EDGE_RSRC_HEAVY_THREAD_SET, thread_tid(thread), type, agent);
3060 if (thread == current_thread()) {
3061 if (agent == SHARED_RSRC_POLICY_AGENT_PERFCTL_QUANTUM) {
3062 ast_on(AST_PREEMPT);
3063 } else {
3064 assert(agent != SHARED_RSRC_POLICY_AGENT_PERFCTL_CSW);
3065 thread_block(THREAD_CONTINUE_NULL);
3066 }
3067 }
3068 return KERN_SUCCESS;
3069 }
3070
3071 kern_return_t
thread_shared_rsrc_policy_clear(thread_t thread,cluster_shared_rsrc_type_t type,shared_rsrc_policy_agent_t agent)3072 thread_shared_rsrc_policy_clear(thread_t thread, cluster_shared_rsrc_type_t type, shared_rsrc_policy_agent_t agent)
3073 {
3074 spl_t s = splsched();
3075 thread_lock(thread);
3076
3077 bool user = (agent == SHARED_RSRC_POLICY_AGENT_DISPATCH) || (agent == SHARED_RSRC_POLICY_AGENT_SYSCTL);
3078 bool *thread_flags = (user) ? thread->th_shared_rsrc_heavy_user : thread->th_shared_rsrc_heavy_perf_control;
3079 if (!thread_flags[type]) {
3080 thread_unlock(thread);
3081 splx(s);
3082 return KERN_FAILURE;
3083 }
3084
3085 thread_flags[type] = false;
3086 thread_unlock(thread);
3087 splx(s);
3088
3089 KDBG(MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_EDGE_RSRC_HEAVY_THREAD) | DBG_FUNC_NONE, SCHED_EDGE_RSRC_HEAVY_THREAD_CLR, thread_tid(thread), type, agent);
3090 if (thread == current_thread()) {
3091 if (agent == SHARED_RSRC_POLICY_AGENT_PERFCTL_QUANTUM) {
3092 ast_on(AST_PREEMPT);
3093 } else {
3094 assert(agent != SHARED_RSRC_POLICY_AGENT_PERFCTL_CSW);
3095 thread_block(THREAD_CONTINUE_NULL);
3096 }
3097 }
3098 return KERN_SUCCESS;
3099 }
3100
3101 #else /* CONFIG_SCHED_EDGE */
3102
3103 /*
3104 * On non-Edge schedulers, the shared resource contention
3105 * is managed by simply binding threads to specific clusters
3106 * based on the worker index passed by the agents marking
3107 * this thread as resource heavy threads. The thread binding
3108 * approach does not provide any rebalancing opportunities;
3109 * it can also suffer from scheduling delays if the cluster
3110 * where the thread is bound is contended.
3111 */
3112
3113 boolean_t
thread_shared_rsrc_policy_get(__unused thread_t thread,__unused cluster_shared_rsrc_type_t type)3114 thread_shared_rsrc_policy_get(__unused thread_t thread, __unused cluster_shared_rsrc_type_t type)
3115 {
3116 return false;
3117 }
3118
3119 kern_return_t
thread_shared_rsrc_policy_set(thread_t thread,uint32_t index,__unused cluster_shared_rsrc_type_t type,__unused shared_rsrc_policy_agent_t agent)3120 thread_shared_rsrc_policy_set(thread_t thread, uint32_t index, __unused cluster_shared_rsrc_type_t type, __unused shared_rsrc_policy_agent_t agent)
3121 {
3122 return thread_bind_cluster_id(thread, index, THREAD_BIND_SOFT | THREAD_BIND_ELIGIBLE_ONLY);
3123 }
3124
3125 kern_return_t
thread_shared_rsrc_policy_clear(thread_t thread,__unused cluster_shared_rsrc_type_t type,__unused shared_rsrc_policy_agent_t agent)3126 thread_shared_rsrc_policy_clear(thread_t thread, __unused cluster_shared_rsrc_type_t type, __unused shared_rsrc_policy_agent_t agent)
3127 {
3128 return thread_bind_cluster_id(thread, 0, THREAD_UNBIND);
3129 }
3130
3131 #endif /* CONFIG_SCHED_EDGE */
3132
3133 uint64_t
thread_dispatchqaddr(thread_t thread)3134 thread_dispatchqaddr(
3135 thread_t thread)
3136 {
3137 uint64_t dispatchqueue_addr;
3138 uint64_t thread_handle;
3139 task_t task;
3140
3141 if (thread == THREAD_NULL) {
3142 return 0;
3143 }
3144
3145 thread_handle = thread->machine.cthread_self;
3146 if (thread_handle == 0) {
3147 return 0;
3148 }
3149
3150 task = get_threadtask(thread);
3151 void *bsd_info = get_bsdtask_info(task);
3152 if (thread->inspection == TRUE) {
3153 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(task);
3154 } else if (bsd_info) {
3155 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(bsd_info);
3156 } else {
3157 dispatchqueue_addr = 0;
3158 }
3159
3160 return dispatchqueue_addr;
3161 }
3162
3163
3164 uint64_t
thread_wqquantum_addr(thread_t thread)3165 thread_wqquantum_addr(thread_t thread)
3166 {
3167 uint64_t thread_handle;
3168 task_t task;
3169
3170 if (thread == THREAD_NULL) {
3171 return 0;
3172 }
3173
3174 thread_handle = thread->machine.cthread_self;
3175 if (thread_handle == 0) {
3176 return 0;
3177 }
3178 task = get_threadtask(thread);
3179
3180 uint64_t wq_quantum_expiry_offset = get_wq_quantum_offset_from_proc(get_bsdtask_info(task));
3181 if (wq_quantum_expiry_offset == 0) {
3182 return 0;
3183 }
3184
3185 return wq_quantum_expiry_offset + thread_handle;
3186 }
3187
3188 uint64_t
thread_rettokern_addr(thread_t thread)3189 thread_rettokern_addr(
3190 thread_t thread)
3191 {
3192 uint64_t rettokern_addr;
3193 uint64_t rettokern_offset;
3194 uint64_t thread_handle;
3195 task_t task;
3196 void *bsd_info;
3197
3198 if (thread == THREAD_NULL) {
3199 return 0;
3200 }
3201
3202 thread_handle = thread->machine.cthread_self;
3203 if (thread_handle == 0) {
3204 return 0;
3205 }
3206 task = get_threadtask(thread);
3207 bsd_info = get_bsdtask_info(task);
3208
3209 if (bsd_info) {
3210 rettokern_offset = get_return_to_kernel_offset_from_proc(bsd_info);
3211
3212 /* Return 0 if return to kernel offset is not initialized. */
3213 if (rettokern_offset == 0) {
3214 rettokern_addr = 0;
3215 } else {
3216 rettokern_addr = thread_handle + rettokern_offset;
3217 }
3218 } else {
3219 rettokern_addr = 0;
3220 }
3221
3222 return rettokern_addr;
3223 }
3224
3225 /*
3226 * Export routines to other components for things that are done as macros
3227 * within the osfmk component.
3228 */
3229
3230 void
thread_mtx_lock(thread_t thread)3231 thread_mtx_lock(thread_t thread)
3232 {
3233 lck_mtx_lock(&thread->mutex);
3234 }
3235
3236 void
thread_mtx_unlock(thread_t thread)3237 thread_mtx_unlock(thread_t thread)
3238 {
3239 lck_mtx_unlock(&thread->mutex);
3240 }
3241
3242 void
thread_reference(thread_t thread)3243 thread_reference(
3244 thread_t thread)
3245 {
3246 if (thread != THREAD_NULL) {
3247 zone_id_require(ZONE_ID_THREAD, sizeof(struct thread), thread);
3248 os_ref_retain_raw(&thread->ref_count, &thread_refgrp);
3249 }
3250 }
3251
3252 void
thread_require(thread_t thread)3253 thread_require(thread_t thread)
3254 {
3255 zone_id_require(ZONE_ID_THREAD, sizeof(struct thread), thread);
3256 }
3257
3258 #undef thread_should_halt
3259
3260 boolean_t
thread_should_halt(thread_t th)3261 thread_should_halt(
3262 thread_t th)
3263 {
3264 return thread_should_halt_fast(th);
3265 }
3266
3267 /*
3268 * thread_set_voucher_name - reset the voucher port name bound to this thread
3269 *
3270 * Conditions: nothing locked
3271 */
3272
3273 kern_return_t
thread_set_voucher_name(mach_port_name_t voucher_name)3274 thread_set_voucher_name(mach_port_name_t voucher_name)
3275 {
3276 thread_t thread = current_thread();
3277 ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
3278 ipc_voucher_t voucher;
3279 ledger_t bankledger = NULL;
3280 struct thread_group *banktg = NULL;
3281 uint32_t persona_id = 0;
3282
3283 if (MACH_PORT_DEAD == voucher_name) {
3284 return KERN_INVALID_RIGHT;
3285 }
3286
3287 /*
3288 * agressively convert to voucher reference
3289 */
3290 if (MACH_PORT_VALID(voucher_name)) {
3291 new_voucher = convert_port_name_to_voucher(voucher_name);
3292 if (IPC_VOUCHER_NULL == new_voucher) {
3293 return KERN_INVALID_ARGUMENT;
3294 }
3295 }
3296 bank_get_bank_ledger_thread_group_and_persona(new_voucher, &bankledger, &banktg, &persona_id);
3297
3298 thread_mtx_lock(thread);
3299 voucher = thread->ith_voucher;
3300 thread->ith_voucher_name = voucher_name;
3301 thread->ith_voucher = new_voucher;
3302 thread_mtx_unlock(thread);
3303
3304 bank_swap_thread_bank_ledger(thread, bankledger);
3305 #if CONFIG_THREAD_GROUPS
3306 thread_group_set_bank(thread, banktg);
3307 #endif /* CONFIG_THREAD_GROUPS */
3308
3309 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3310 MACHDBG_CODE(DBG_MACH_IPC, MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3311 (uintptr_t)thread_tid(thread),
3312 (uintptr_t)voucher_name,
3313 VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
3314 persona_id, 0);
3315
3316 if (IPC_VOUCHER_NULL != voucher) {
3317 ipc_voucher_release(voucher);
3318 }
3319
3320 return KERN_SUCCESS;
3321 }
3322
3323 /*
3324 * thread_get_mach_voucher - return a voucher reference for the specified thread voucher
3325 *
3326 * Conditions: nothing locked
3327 *
3328 * NOTE: At the moment, there is no distinction between the current and effective
3329 * vouchers because we only set them at the thread level currently.
3330 */
3331 kern_return_t
thread_get_mach_voucher(thread_act_t thread,mach_voucher_selector_t __unused which,ipc_voucher_t * voucherp)3332 thread_get_mach_voucher(
3333 thread_act_t thread,
3334 mach_voucher_selector_t __unused which,
3335 ipc_voucher_t *voucherp)
3336 {
3337 ipc_voucher_t voucher;
3338
3339 if (THREAD_NULL == thread) {
3340 return KERN_INVALID_ARGUMENT;
3341 }
3342
3343 thread_mtx_lock(thread);
3344 voucher = thread->ith_voucher;
3345
3346 if (IPC_VOUCHER_NULL != voucher) {
3347 ipc_voucher_reference(voucher);
3348 thread_mtx_unlock(thread);
3349 *voucherp = voucher;
3350 return KERN_SUCCESS;
3351 }
3352
3353 thread_mtx_unlock(thread);
3354
3355 *voucherp = IPC_VOUCHER_NULL;
3356 return KERN_SUCCESS;
3357 }
3358
3359 /*
3360 * thread_set_mach_voucher - set a voucher reference for the specified thread voucher
3361 *
3362 * Conditions: callers holds a reference on the voucher.
3363 * nothing locked.
3364 *
3365 * We grab another reference to the voucher and bind it to the thread.
3366 * The old voucher reference associated with the thread is
3367 * discarded.
3368 */
3369 kern_return_t
thread_set_mach_voucher(thread_t thread,ipc_voucher_t voucher)3370 thread_set_mach_voucher(
3371 thread_t thread,
3372 ipc_voucher_t voucher)
3373 {
3374 ipc_voucher_t old_voucher;
3375 ledger_t bankledger = NULL;
3376 struct thread_group *banktg = NULL;
3377 uint32_t persona_id = 0;
3378
3379 if (THREAD_NULL == thread) {
3380 return KERN_INVALID_ARGUMENT;
3381 }
3382
3383 bank_get_bank_ledger_thread_group_and_persona(voucher, &bankledger, &banktg, &persona_id);
3384
3385 thread_mtx_lock(thread);
3386 /*
3387 * Once the thread is started, we will look at `ith_voucher` without
3388 * holding any lock.
3389 *
3390 * Setting the voucher hence can only be done by current_thread() or
3391 * before it started. "started" flips under the thread mutex and must be
3392 * tested under it too.
3393 */
3394 if (thread != current_thread() && thread->started) {
3395 thread_mtx_unlock(thread);
3396 return KERN_INVALID_ARGUMENT;
3397 }
3398
3399 ipc_voucher_reference(voucher);
3400 old_voucher = thread->ith_voucher;
3401 thread->ith_voucher = voucher;
3402 thread->ith_voucher_name = MACH_PORT_NULL;
3403 thread_mtx_unlock(thread);
3404
3405 bank_swap_thread_bank_ledger(thread, bankledger);
3406 #if CONFIG_THREAD_GROUPS
3407 thread_group_set_bank(thread, banktg);
3408 #endif /* CONFIG_THREAD_GROUPS */
3409
3410 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3411 MACHDBG_CODE(DBG_MACH_IPC, MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3412 (uintptr_t)thread_tid(thread),
3413 (uintptr_t)MACH_PORT_NULL,
3414 VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3415 persona_id, 0);
3416
3417 ipc_voucher_release(old_voucher);
3418
3419 return KERN_SUCCESS;
3420 }
3421
3422 /*
3423 * thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
3424 *
3425 * Conditions: callers holds a reference on the new and presumed old voucher(s).
3426 * nothing locked.
3427 *
3428 * This function is no longer supported.
3429 */
3430 kern_return_t
thread_swap_mach_voucher(__unused thread_t thread,__unused ipc_voucher_t new_voucher,ipc_voucher_t * in_out_old_voucher)3431 thread_swap_mach_voucher(
3432 __unused thread_t thread,
3433 __unused ipc_voucher_t new_voucher,
3434 ipc_voucher_t *in_out_old_voucher)
3435 {
3436 /*
3437 * Currently this function is only called from a MIG generated
3438 * routine which doesn't release the reference on the voucher
3439 * addressed by in_out_old_voucher. To avoid leaking this reference,
3440 * a call to release it has been added here.
3441 */
3442 ipc_voucher_release(*in_out_old_voucher);
3443 OS_ANALYZER_SUPPRESS("81787115") return KERN_NOT_SUPPORTED;
3444 }
3445
3446 /*
3447 * thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
3448 */
3449 kern_return_t
thread_get_current_voucher_origin_pid(int32_t * pid)3450 thread_get_current_voucher_origin_pid(
3451 int32_t *pid)
3452 {
3453 uint32_t buf_size;
3454 kern_return_t kr;
3455 thread_t thread = current_thread();
3456
3457 buf_size = sizeof(*pid);
3458 kr = mach_voucher_attr_command(thread->ith_voucher,
3459 MACH_VOUCHER_ATTR_KEY_BANK,
3460 BANK_ORIGINATOR_PID,
3461 NULL,
3462 0,
3463 (mach_voucher_attr_content_t)pid,
3464 &buf_size);
3465
3466 return kr;
3467 }
3468
3469 #if CONFIG_THREAD_GROUPS
3470 /*
3471 * Returns the current thread's voucher-carried thread group
3472 *
3473 * Reference is borrowed from this being the current voucher, so it does NOT
3474 * return a reference to the group.
3475 */
3476 struct thread_group *
thread_get_current_voucher_thread_group(thread_t thread)3477 thread_get_current_voucher_thread_group(thread_t thread)
3478 {
3479 assert(thread == current_thread());
3480
3481 if (thread->ith_voucher == NULL) {
3482 return NULL;
3483 }
3484
3485 ledger_t bankledger = NULL;
3486 struct thread_group *banktg = NULL;
3487
3488 bank_get_bank_ledger_thread_group_and_persona(thread->ith_voucher, &bankledger, &banktg, NULL);
3489
3490 return banktg;
3491 }
3492
3493 #endif /* CONFIG_THREAD_GROUPS */
3494
3495 extern struct workqueue *
3496 proc_get_wqptr(void *proc);
3497
3498 static bool
task_supports_cooperative_workqueue(task_t task)3499 task_supports_cooperative_workqueue(task_t task)
3500 {
3501 void *bsd_info = get_bsdtask_info(task);
3502
3503 assert(task == current_task());
3504 if (bsd_info == NULL) {
3505 return false;
3506 }
3507
3508 uint64_t wq_quantum_expiry_offset = get_wq_quantum_offset_from_proc(bsd_info);
3509 /* userspace may not yet have called workq_open yet */
3510 struct workqueue *wq = proc_get_wqptr(bsd_info);
3511
3512 return (wq != NULL) && (wq_quantum_expiry_offset != 0);
3513 }
3514
3515 /* Not safe to call from scheduler paths - should only be called on self */
3516 bool
thread_supports_cooperative_workqueue(thread_t thread)3517 thread_supports_cooperative_workqueue(thread_t thread)
3518 {
3519 struct uthread *uth = get_bsdthread_info(thread);
3520 task_t task = get_threadtask(thread);
3521
3522 assert(thread == current_thread());
3523
3524 return task_supports_cooperative_workqueue(task) &&
3525 bsdthread_part_of_cooperative_workqueue(uth);
3526 }
3527
3528 static inline bool
thread_has_armed_workqueue_quantum(thread_t thread)3529 thread_has_armed_workqueue_quantum(thread_t thread)
3530 {
3531 return thread->workq_quantum_deadline != 0;
3532 }
3533
3534 /*
3535 * The workq quantum is a lazy timer that is evaluated at 2 specific times in
3536 * the scheduler:
3537 *
3538 * - context switch time
3539 * - scheduler quantum expiry time.
3540 *
3541 * We're currently expressing the workq quantum with a 0.5 scale factor of the
3542 * scheduler quantum. It is possible that if the workq quantum is rearmed
3543 * shortly after the scheduler quantum begins, we could have a large delay
3544 * between when the workq quantum next expires and when it actually is noticed.
3545 *
3546 * A potential future improvement for the wq quantum expiry logic is to compare
3547 * it to the next actual scheduler quantum deadline and expire it if it is
3548 * within a certain leeway.
3549 */
3550 static inline uint64_t
thread_workq_quantum_size(thread_t thread)3551 thread_workq_quantum_size(thread_t thread)
3552 {
3553 return (uint64_t) (SCHED(initial_quantum_size)(thread) / 2);
3554 }
3555
3556 /*
3557 * Always called by thread on itself - either at AST boundary after processing
3558 * an existing quantum expiry, or when a new quantum is armed before the thread
3559 * goes out to userspace to handle a thread request
3560 */
3561 void
thread_arm_workqueue_quantum(thread_t thread)3562 thread_arm_workqueue_quantum(thread_t thread)
3563 {
3564 /*
3565 * If the task is not opted into wq quantum notification, or if the thread
3566 * is not part of the cooperative workqueue, don't even bother with tracking
3567 * the quantum or calculating expiry
3568 */
3569 if (!thread_supports_cooperative_workqueue(thread)) {
3570 assert(thread->workq_quantum_deadline == 0);
3571 return;
3572 }
3573
3574 assert(current_thread() == thread);
3575 assert(thread_get_tag(thread) & THREAD_TAG_WORKQUEUE);
3576
3577 uint64_t current_runtime = thread_get_runtime_self();
3578 uint64_t deadline = thread_workq_quantum_size(thread) + current_runtime;
3579
3580 /*
3581 * The update of a workqueue quantum should always be followed by the update
3582 * of the AST - see explanation in kern/thread.h for synchronization of this
3583 * field
3584 */
3585 thread->workq_quantum_deadline = deadline;
3586
3587 /* We're arming a new quantum, clear any previous expiry notification */
3588 act_clear_astkevent(thread, AST_KEVENT_WORKQ_QUANTUM_EXPIRED);
3589
3590 WQ_TRACE(TRACE_wq_quantum_arm, current_runtime, deadline, 0, 0);
3591
3592 WORKQ_QUANTUM_HISTORY_WRITE_ENTRY(thread, thread->workq_quantum_deadline, true);
3593 }
3594
3595 /* Called by a thread on itself when it is about to park */
3596 void
thread_disarm_workqueue_quantum(thread_t thread)3597 thread_disarm_workqueue_quantum(thread_t thread)
3598 {
3599 /* The update of a workqueue quantum should always be followed by the update
3600 * of the AST - see explanation in kern/thread.h for synchronization of this
3601 * field */
3602 thread->workq_quantum_deadline = 0;
3603 act_clear_astkevent(thread, AST_KEVENT_WORKQ_QUANTUM_EXPIRED);
3604
3605 WQ_TRACE(TRACE_wq_quantum_disarm, 0, 0, 0, 0);
3606
3607 WORKQ_QUANTUM_HISTORY_WRITE_ENTRY(thread, thread->workq_quantum_deadline, false);
3608 }
3609
3610 /* This is called at context switch time on a thread that may not be self,
3611 * and at AST time
3612 */
3613 bool
thread_has_expired_workqueue_quantum(thread_t thread,bool should_trace)3614 thread_has_expired_workqueue_quantum(thread_t thread, bool should_trace)
3615 {
3616 if (!thread_has_armed_workqueue_quantum(thread)) {
3617 return false;
3618 }
3619 /* We do not do a thread_get_runtime_self() here since this function is
3620 * called from context switch time or during scheduler quantum expiry and
3621 * therefore, we may not be evaluating it on the current thread/self.
3622 *
3623 * In addition, the timers on the thread have just been updated recently so
3624 * we don't need to update them again.
3625 */
3626 uint64_t runtime = recount_thread_time_mach(thread);
3627 bool expired = runtime > thread->workq_quantum_deadline;
3628
3629 if (expired && should_trace) {
3630 WQ_TRACE(TRACE_wq_quantum_expired, runtime, thread->workq_quantum_deadline, 0, 0);
3631 }
3632
3633 return expired;
3634 }
3635
3636 /*
3637 * Called on a thread that is being context switched out or during quantum
3638 * expiry on self. Only called from scheduler paths.
3639 */
3640 void
thread_evaluate_workqueue_quantum_expiry(thread_t thread)3641 thread_evaluate_workqueue_quantum_expiry(thread_t thread)
3642 {
3643 if (thread_has_expired_workqueue_quantum(thread, true)) {
3644 act_set_astkevent(thread, AST_KEVENT_WORKQ_QUANTUM_EXPIRED);
3645 }
3646 }
3647
3648 boolean_t
thread_has_thread_name(thread_t th)3649 thread_has_thread_name(thread_t th)
3650 {
3651 if (th) {
3652 return bsd_hasthreadname(get_bsdthread_info(th));
3653 }
3654
3655 /*
3656 * This is an odd case; clients may set the thread name based on the lack of
3657 * a name, but in this context there is no uthread to attach the name to.
3658 */
3659 return FALSE;
3660 }
3661
3662 void
thread_set_thread_name(thread_t th,const char * name)3663 thread_set_thread_name(thread_t th, const char* name)
3664 {
3665 if (th && name) {
3666 bsd_setthreadname(get_bsdthread_info(th), name);
3667 }
3668 }
3669
3670 void
thread_get_thread_name(thread_t th,char * name)3671 thread_get_thread_name(thread_t th, char* name)
3672 {
3673 if (!name) {
3674 return;
3675 }
3676 if (th) {
3677 bsd_getthreadname(get_bsdthread_info(th), name);
3678 } else {
3679 name[0] = '\0';
3680 }
3681 }
3682
3683 void
thread_set_honor_qlimit(thread_t thread)3684 thread_set_honor_qlimit(thread_t thread)
3685 {
3686 thread->options |= TH_OPT_HONOR_QLIMIT;
3687 }
3688
3689 void
thread_clear_honor_qlimit(thread_t thread)3690 thread_clear_honor_qlimit(thread_t thread)
3691 {
3692 thread->options &= (~TH_OPT_HONOR_QLIMIT);
3693 }
3694
3695 /*
3696 * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
3697 */
3698 void
thread_enable_send_importance(thread_t thread,boolean_t enable)3699 thread_enable_send_importance(thread_t thread, boolean_t enable)
3700 {
3701 if (enable == TRUE) {
3702 thread->options |= TH_OPT_SEND_IMPORTANCE;
3703 } else {
3704 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
3705 }
3706 }
3707
3708 kern_return_t
thread_get_ipc_propagate_attr(thread_t thread,struct thread_attr_for_ipc_propagation * attr)3709 thread_get_ipc_propagate_attr(thread_t thread, struct thread_attr_for_ipc_propagation *attr)
3710 {
3711 int iotier;
3712 int qos;
3713
3714 if (thread == NULL || attr == NULL) {
3715 return KERN_INVALID_ARGUMENT;
3716 }
3717
3718 iotier = proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
3719 qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
3720
3721 attr->tafip_iotier = iotier;
3722 attr->tafip_qos = qos;
3723
3724 return KERN_SUCCESS;
3725 }
3726
3727 /*
3728 * thread_set_allocation_name - .
3729 */
3730
3731 kern_allocation_name_t
thread_set_allocation_name(kern_allocation_name_t new_name)3732 thread_set_allocation_name(kern_allocation_name_t new_name)
3733 {
3734 kern_allocation_name_t ret;
3735 thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
3736 ret = kstate->allocation_name;
3737 // fifo
3738 if (!new_name || !kstate->allocation_name) {
3739 kstate->allocation_name = new_name;
3740 }
3741 return ret;
3742 }
3743
3744 void *
thread_iokit_tls_get(uint32_t index)3745 thread_iokit_tls_get(uint32_t index)
3746 {
3747 assert(index < THREAD_SAVE_IOKIT_TLS_COUNT);
3748 return current_thread()->saved.iokit.tls[index];
3749 }
3750
3751 void
thread_iokit_tls_set(uint32_t index,void * data)3752 thread_iokit_tls_set(uint32_t index, void * data)
3753 {
3754 assert(index < THREAD_SAVE_IOKIT_TLS_COUNT);
3755 current_thread()->saved.iokit.tls[index] = data;
3756 }
3757
3758 uint64_t
thread_get_last_wait_duration(thread_t thread)3759 thread_get_last_wait_duration(thread_t thread)
3760 {
3761 return thread->last_made_runnable_time - thread->last_run_time;
3762 }
3763
3764 integer_t
thread_kern_get_pri(thread_t thr)3765 thread_kern_get_pri(thread_t thr)
3766 {
3767 return thr->base_pri;
3768 }
3769
3770 void
thread_kern_set_pri(thread_t thr,integer_t pri)3771 thread_kern_set_pri(thread_t thr, integer_t pri)
3772 {
3773 sched_set_kernel_thread_priority(thr, pri);
3774 }
3775
3776 integer_t
thread_kern_get_kernel_maxpri(void)3777 thread_kern_get_kernel_maxpri(void)
3778 {
3779 return MAXPRI_KERNEL;
3780 }
3781 /*
3782 * thread_port_with_flavor_no_senders
3783 *
3784 * Called whenever the Mach port system detects no-senders on
3785 * the thread inspect or read port. These ports are allocated lazily and
3786 * should be deallocated here when there are no senders remaining.
3787 */
3788 static void
thread_port_with_flavor_no_senders(ipc_port_t port,mach_port_mscount_t mscount __unused)3789 thread_port_with_flavor_no_senders(
3790 ipc_port_t port,
3791 mach_port_mscount_t mscount __unused)
3792 {
3793 thread_ro_t tro;
3794 thread_t thread;
3795 mach_thread_flavor_t flavor;
3796 ipc_kobject_type_t kotype;
3797
3798 ip_mq_lock(port);
3799 if (port->ip_srights > 0) {
3800 ip_mq_unlock(port);
3801 return;
3802 }
3803 kotype = ip_kotype(port);
3804 assert((IKOT_THREAD_READ == kotype) || (IKOT_THREAD_INSPECT == kotype));
3805 thread = ipc_kobject_get_locked(port, kotype);
3806 if (thread != THREAD_NULL) {
3807 thread_reference(thread);
3808 }
3809 ip_mq_unlock(port);
3810
3811 if (thread == THREAD_NULL) {
3812 /* The thread is exiting or disabled; it will eventually deallocate the port */
3813 return;
3814 }
3815
3816 if (kotype == IKOT_THREAD_READ) {
3817 flavor = THREAD_FLAVOR_READ;
3818 } else {
3819 flavor = THREAD_FLAVOR_INSPECT;
3820 }
3821
3822 thread_mtx_lock(thread);
3823 ip_mq_lock(port);
3824
3825 /*
3826 * If the port is no longer active, then ipc_thread_terminate() ran
3827 * and destroyed the kobject already. Just deallocate the task
3828 * ref we took and go away.
3829 *
3830 * It is also possible that several nsrequests are in flight,
3831 * only one shall NULL-out the port entry, and this is the one
3832 * that gets to dealloc the port.
3833 *
3834 * Check for a stale no-senders notification. A call to any function
3835 * that vends out send rights to this port could resurrect it between
3836 * this notification being generated and actually being handled here.
3837 */
3838 tro = get_thread_ro(thread);
3839 if (!ip_active(port) ||
3840 tro->tro_ports[flavor] != port ||
3841 port->ip_srights > 0) {
3842 ip_mq_unlock(port);
3843 thread_mtx_unlock(thread);
3844 thread_deallocate(thread);
3845 return;
3846 }
3847
3848 assert(tro->tro_ports[flavor] == port);
3849 zalloc_ro_clear_field(ZONE_ID_THREAD_RO, tro, tro_ports[flavor]);
3850 thread_mtx_unlock(thread);
3851
3852 ipc_kobject_dealloc_port_and_unlock(port, 0, kotype);
3853
3854 thread_deallocate(thread);
3855 }
3856
3857 /*
3858 * The 'thread_region_page_shift' is used by footprint
3859 * to specify the page size that it will use to
3860 * accomplish its accounting work on the task being
3861 * inspected. Since footprint uses a thread for each
3862 * task that it works on, we need to keep the page_shift
3863 * on a per-thread basis.
3864 */
3865
3866 int
thread_self_region_page_shift(void)3867 thread_self_region_page_shift(void)
3868 {
3869 /*
3870 * Return the page shift that this thread
3871 * would like to use for its accounting work.
3872 */
3873 return current_thread()->thread_region_page_shift;
3874 }
3875
3876 void
thread_self_region_page_shift_set(int pgshift)3877 thread_self_region_page_shift_set(
3878 int pgshift)
3879 {
3880 /*
3881 * Set the page shift that this thread
3882 * would like to use for its accounting work
3883 * when dealing with a task.
3884 */
3885 current_thread()->thread_region_page_shift = pgshift;
3886 }
3887
3888 __startup_func
3889 static void
ctid_table_init(void)3890 ctid_table_init(void)
3891 {
3892 /*
3893 * Pretend the early boot setup didn't exist,
3894 * and pick a mangling nonce.
3895 */
3896 *compact_id_resolve(&ctid_table, 0) = THREAD_NULL;
3897 ctid_nonce = (uint32_t)early_random() & CTID_MASK;
3898 }
3899
3900
3901 /*
3902 * This maps the [0, CTID_MAX_THREAD_NUMBER] range
3903 * to [1, CTID_MAX_THREAD_NUMBER + 1 == CTID_MASK]
3904 * so that in mangled form, '0' is an invalid CTID.
3905 */
3906 static ctid_t
ctid_mangle(compact_id_t cid)3907 ctid_mangle(compact_id_t cid)
3908 {
3909 return (cid == ctid_nonce ? CTID_MASK : cid) ^ ctid_nonce;
3910 }
3911
3912 static compact_id_t
ctid_unmangle(ctid_t ctid)3913 ctid_unmangle(ctid_t ctid)
3914 {
3915 ctid ^= ctid_nonce;
3916 return ctid == CTID_MASK ? ctid_nonce : ctid;
3917 }
3918
3919 void
ctid_table_add(thread_t thread)3920 ctid_table_add(thread_t thread)
3921 {
3922 compact_id_t cid;
3923
3924 cid = compact_id_get(&ctid_table, CTID_MAX_THREAD_NUMBER, thread);
3925 thread->ctid = ctid_mangle(cid);
3926 }
3927
3928 void
ctid_table_remove(thread_t thread)3929 ctid_table_remove(thread_t thread)
3930 {
3931 __assert_only thread_t value;
3932
3933 value = compact_id_put(&ctid_table, ctid_unmangle(thread->ctid));
3934 assert3p(value, ==, thread);
3935 thread->ctid = 0;
3936 }
3937
3938 thread_t
ctid_get_thread_unsafe(ctid_t ctid)3939 ctid_get_thread_unsafe(ctid_t ctid)
3940 {
3941 if (ctid) {
3942 return *compact_id_resolve(&ctid_table, ctid_unmangle(ctid));
3943 }
3944 return THREAD_NULL;
3945 }
3946
3947 thread_t
ctid_get_thread(ctid_t ctid)3948 ctid_get_thread(ctid_t ctid)
3949 {
3950 thread_t thread = THREAD_NULL;
3951
3952 if (ctid) {
3953 thread = *compact_id_resolve(&ctid_table, ctid_unmangle(ctid));
3954 assert(thread && thread->ctid == ctid);
3955 }
3956 return thread;
3957 }
3958
3959 ctid_t
thread_get_ctid(thread_t thread)3960 thread_get_ctid(thread_t thread)
3961 {
3962 return thread->ctid;
3963 }
3964
3965 /*
3966 * Adjust code signature dependent thread state.
3967 *
3968 * Called to allow code signature dependent adjustments to the thread
3969 * state. Note that this is usually called twice for the main thread:
3970 * Once at thread creation by thread_create, when the signature is
3971 * potentially not attached yet (which is usually the case for the
3972 * first/main thread of a task), and once after the task's signature
3973 * has actually been attached.
3974 *
3975 */
3976 kern_return_t
thread_process_signature(thread_t thread,task_t task)3977 thread_process_signature(thread_t thread, task_t task)
3978 {
3979 return machine_thread_process_signature(thread, task);
3980 }
3981
3982 #if CONFIG_DTRACE
3983 uint32_t
dtrace_get_thread_predcache(thread_t thread)3984 dtrace_get_thread_predcache(thread_t thread)
3985 {
3986 if (thread != THREAD_NULL) {
3987 return thread->t_dtrace_predcache;
3988 } else {
3989 return 0;
3990 }
3991 }
3992
3993 int64_t
dtrace_get_thread_vtime(thread_t thread)3994 dtrace_get_thread_vtime(thread_t thread)
3995 {
3996 if (thread != THREAD_NULL) {
3997 return thread->t_dtrace_vtime;
3998 } else {
3999 return 0;
4000 }
4001 }
4002
4003 int
dtrace_get_thread_last_cpu_id(thread_t thread)4004 dtrace_get_thread_last_cpu_id(thread_t thread)
4005 {
4006 if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
4007 return thread->last_processor->cpu_id;
4008 } else {
4009 return -1;
4010 }
4011 }
4012
4013 int64_t
dtrace_get_thread_tracing(thread_t thread)4014 dtrace_get_thread_tracing(thread_t thread)
4015 {
4016 if (thread != THREAD_NULL) {
4017 return thread->t_dtrace_tracing;
4018 } else {
4019 return 0;
4020 }
4021 }
4022
4023 uint16_t
dtrace_get_thread_inprobe(thread_t thread)4024 dtrace_get_thread_inprobe(thread_t thread)
4025 {
4026 if (thread != THREAD_NULL) {
4027 return thread->t_dtrace_inprobe;
4028 } else {
4029 return 0;
4030 }
4031 }
4032
4033 vm_offset_t
thread_get_kernel_stack(thread_t thread)4034 thread_get_kernel_stack(thread_t thread)
4035 {
4036 if (thread != THREAD_NULL) {
4037 return thread->kernel_stack;
4038 } else {
4039 return 0;
4040 }
4041 }
4042
4043 #if KASAN
4044 struct kasan_thread_data *
kasan_get_thread_data(thread_t thread)4045 kasan_get_thread_data(thread_t thread)
4046 {
4047 return &thread->kasan_data;
4048 }
4049 #endif
4050
4051 #if CONFIG_KCOV
4052 kcov_thread_data_t *
kcov_get_thread_data(thread_t thread)4053 kcov_get_thread_data(thread_t thread)
4054 {
4055 return &thread->kcov_data;
4056 }
4057 #endif
4058
4059 #if CONFIG_STKSZ
4060 /*
4061 * Returns base of a thread's kernel stack.
4062 *
4063 * Coverage sanitizer instruments every function including those that participates in stack handoff between threads.
4064 * There is a window in which CPU still holds old values but stack has been handed over to anoher thread already.
4065 * In this window kernel_stack is 0 but CPU still uses the original stack (until contex switch occurs). The original
4066 * kernel_stack value is preserved in ksancov_stack during this window.
4067 */
4068 vm_offset_t
kcov_stksz_get_thread_stkbase(thread_t thread)4069 kcov_stksz_get_thread_stkbase(thread_t thread)
4070 {
4071 if (thread != THREAD_NULL) {
4072 kcov_thread_data_t *data = kcov_get_thread_data(thread);
4073 if (data->ktd_stksz.kst_stack) {
4074 return data->ktd_stksz.kst_stack;
4075 } else {
4076 return thread->kernel_stack;
4077 }
4078 } else {
4079 return 0;
4080 }
4081 }
4082
4083 vm_offset_t
kcov_stksz_get_thread_stksize(thread_t thread)4084 kcov_stksz_get_thread_stksize(thread_t thread)
4085 {
4086 if (thread != THREAD_NULL) {
4087 return kernel_stack_size;
4088 } else {
4089 return 0;
4090 }
4091 }
4092
4093 void
kcov_stksz_set_thread_stack(thread_t thread,vm_offset_t stack)4094 kcov_stksz_set_thread_stack(thread_t thread, vm_offset_t stack)
4095 {
4096 kcov_thread_data_t *data = kcov_get_thread_data(thread);
4097 data->ktd_stksz.kst_stack = stack;
4098 }
4099 #endif /* CONFIG_STKSZ */
4100
4101 int64_t
dtrace_calc_thread_recent_vtime(thread_t thread)4102 dtrace_calc_thread_recent_vtime(thread_t thread)
4103 {
4104 if (thread == THREAD_NULL) {
4105 return 0;
4106 }
4107
4108 struct recount_usage usage = { 0 };
4109 recount_current_thread_usage(&usage);
4110 return (int64_t)(usage.ru_system_time_mach + usage.ru_user_time_mach);
4111 }
4112
4113 void
dtrace_set_thread_predcache(thread_t thread,uint32_t predcache)4114 dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
4115 {
4116 if (thread != THREAD_NULL) {
4117 thread->t_dtrace_predcache = predcache;
4118 }
4119 }
4120
4121 void
dtrace_set_thread_vtime(thread_t thread,int64_t vtime)4122 dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
4123 {
4124 if (thread != THREAD_NULL) {
4125 thread->t_dtrace_vtime = vtime;
4126 }
4127 }
4128
4129 void
dtrace_set_thread_tracing(thread_t thread,int64_t accum)4130 dtrace_set_thread_tracing(thread_t thread, int64_t accum)
4131 {
4132 if (thread != THREAD_NULL) {
4133 thread->t_dtrace_tracing = accum;
4134 }
4135 }
4136
4137 void
dtrace_set_thread_inprobe(thread_t thread,uint16_t inprobe)4138 dtrace_set_thread_inprobe(thread_t thread, uint16_t inprobe)
4139 {
4140 if (thread != THREAD_NULL) {
4141 thread->t_dtrace_inprobe = inprobe;
4142 }
4143 }
4144
4145 void
dtrace_thread_bootstrap(void)4146 dtrace_thread_bootstrap(void)
4147 {
4148 task_t task = current_task();
4149
4150 if (task->thread_count == 1) {
4151 thread_t thread = current_thread();
4152 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
4153 thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
4154 DTRACE_PROC(exec__success);
4155 KDBG(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXEC),
4156 task_pid(task));
4157 }
4158 DTRACE_PROC(start);
4159 }
4160 DTRACE_PROC(lwp__start);
4161 }
4162
4163 void
dtrace_thread_didexec(thread_t thread)4164 dtrace_thread_didexec(thread_t thread)
4165 {
4166 thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
4167 }
4168 #endif /* CONFIG_DTRACE */
4169