1 /*
2 * Copyright (c) 2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/host_priv.h>
30 #include <mach/host_special_ports.h>
31 #include <mach/memory_error_notification.h>
32
33 #include <mach/mach_types.h>
34 #include <mach/host_info.h>
35 #include <kern/host.h>
36 #include <kern/locks.h>
37 #include <kern/ecc.h>
38 #include <kern/spl.h>
39 #include <kern/mpsc_queue.h>
40 #include <kern/thread.h>
41 #include <kern/startup.h>
42 #include <os/log.h>
43 #include <pexpert/pexpert.h>
44 #include <libkern/OSAtomic.h>
45 #include <arm/pmap_public.h>
46 #include <vm/vm_protos.h>
47
48 /* New CoreAnalytics ECC logging mechanism */
49
50
51 kern_return_t
ecc_log_memory_error(__unused pmap_paddr_t physical_address,__unused uint32_t ecc_flags)52 ecc_log_memory_error(
53 __unused pmap_paddr_t physical_address,
54 __unused uint32_t ecc_flags)
55 {
56 return KERN_FAILURE;
57 }
58
59 kern_return_t
ecc_log_memory_error_internal(__unused pmap_paddr_t physical_address,__unused uint32_t ecc_flags)60 ecc_log_memory_error_internal(
61 __unused pmap_paddr_t physical_address,
62 __unused uint32_t ecc_flags)
63 {
64 return KERN_FAILURE;
65 }
66
67 kern_return_t
ecc_log_memory_error_ce(__unused pmap_paddr_t physical_address,__unused uint32_t ecc_flags,__unused uint32_t ce_count)68 ecc_log_memory_error_ce(__unused pmap_paddr_t physical_address,
69 __unused uint32_t ecc_flags,
70 __unused uint32_t ce_count)
71 {
72 return KERN_FAILURE;
73 }
74
75 /**
76 * MCC Logging
77 */
78
79 /**
80 * TODO: rdar://97394997 (Clean up ECC / MCC logging)
81 * We can probably clean some of this up and share some of the code with ECC.
82 */
83 #if XNU_HANDLE_MCC
84
85 static struct mpsc_daemon_queue mcc_memory_error_event_queue;
86 struct _mcc_mem_err_event {
87 struct mpsc_queue_chain link;
88 mcc_ecc_event_t event;
89 };
90 typedef struct _mcc_mem_err_event* mcc_mem_err_event_t;
91
92 #define MCC_ECC_NUM_ERRORS (1024)
93 #define MCC_ERROR_EVENT_QUEUE_PRIORITY MAXPRI_USER
94 static struct _mcc_mem_err_event mcc_events[MCC_ECC_NUM_ERRORS];
95 static atomic_int mcc_events_producer_idx = 0;
96 static atomic_int mcc_events_consumer_idx = 0;
97 SCALABLE_COUNTER_DEFINE(mcc_dropped_events);
98 LCK_GRP_DECLARE(mcc_lock_grp, "mcc");
99 LCK_SPIN_DECLARE(mcc_lock, &mcc_lock_grp);
100
101 static inline int
mcc_events_next(int idx)102 mcc_events_next(int idx)
103 {
104 assert(idx < MCC_ECC_NUM_ERRORS);
105 return (idx + 1) % MCC_ECC_NUM_ERRORS;
106 }
107
108 /* MCC ECC CoreAnalytics Error Logging */
109 static void
mcc_error_notify_user(mcc_ecc_event_t event)110 mcc_error_notify_user(mcc_ecc_event_t event)
111 {
112 mach_port_t user_port = MACH_PORT_NULL;
113
114 kern_return_t kr = host_get_memory_error_port(host_priv_self(), &user_port);
115
116 if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) {
117 os_log(OS_LOG_DEFAULT, "Failed to get memory error port");
118 return;
119 }
120
121 mcc_memory_error_notification(user_port, event);
122
123 ipc_port_release_send(user_port);
124 }
125
126 static void
mcc_memory_error_event_queue_invoke(mpsc_queue_chain_t e,mpsc_daemon_queue_t queue __unused)127 mcc_memory_error_event_queue_invoke(mpsc_queue_chain_t e, mpsc_daemon_queue_t queue __unused)
128 {
129 mcc_mem_err_event_t event;
130
131 /* The consumer should never be invoked if there is nothing to consume. */
132 int mcc_events_consumer_curr_idx = atomic_load(&mcc_events_consumer_idx);
133 assert(mcc_events_consumer_curr_idx != atomic_load(&mcc_events_producer_idx));
134
135 event = mpsc_queue_element(e, struct _mcc_mem_err_event, link);
136 mcc_error_notify_user(event->event);
137 int mcc_events_consumer_next_idx = mcc_events_next(mcc_events_consumer_curr_idx);
138 atomic_store(&mcc_events_consumer_idx, mcc_events_consumer_next_idx);
139 }
140
141 static mcc_mem_err_event_t
mcc_memory_error_create_event(mcc_ecc_event_t mcc_event)142 mcc_memory_error_create_event(mcc_ecc_event_t mcc_event)
143 {
144 mcc_mem_err_event_t ret = NULL;
145
146 /**
147 * @note We are unable to dynamically allocate events, because this function can be called from
148 * the primary interrupt context. Instead, we allocate from a statically sized ring buffer.
149 */
150 const boolean_t interrupts_enabled = ml_set_interrupts_enabled(FALSE);
151 lck_spin_lock(&mcc_lock);
152 int mcc_events_producer_curr_idx = atomic_load(&mcc_events_producer_idx);
153 int mcc_events_producer_next_idx = mcc_events_next(mcc_events_producer_curr_idx);
154 if (mcc_events_producer_next_idx == atomic_load(&mcc_events_consumer_idx)) {
155 /**
156 * The consumer is running behind the producer, and we're in the primary interrupt context.
157 * Drop this event and return NULL to the caller.
158 */
159 counter_inc(&mcc_dropped_events);
160 ret = NULL;
161 goto done;
162 }
163
164 mcc_mem_err_event_t event = &mcc_events[mcc_events_producer_curr_idx];
165 event->event = mcc_event;
166 atomic_store(&mcc_events_producer_idx, mcc_events_producer_next_idx);
167 ret = event;
168
169 done:
170 lck_spin_unlock(&mcc_lock);
171 ml_set_interrupts_enabled(interrupts_enabled);
172 return ret;
173 }
174
175 __startup_func
176 static void
mcc_logging_init(void)177 mcc_logging_init(void)
178 {
179 mpsc_daemon_queue_init_with_thread(&mcc_memory_error_event_queue,
180 mcc_memory_error_event_queue_invoke, MCC_ERROR_EVENT_QUEUE_PRIORITY,
181 "daemon.mcc_error-events", MPSC_DAEMON_INIT_INACTIVE);
182
183 mpsc_daemon_queue_activate(&mcc_memory_error_event_queue);
184 }
185 STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, mcc_logging_init);
186
187 #endif /* XNU_HANDLE_MCC */
188
189 kern_return_t
mcc_log_memory_error(mcc_ecc_event_t mcc_event __unused)190 mcc_log_memory_error(mcc_ecc_event_t mcc_event __unused)
191 {
192 #if XNU_HANDLE_MCC
193 mcc_mem_err_event_t event = mcc_memory_error_create_event(mcc_event);
194 if (event == NULL) {
195 return KERN_RESOURCE_SHORTAGE;
196 }
197 assert(mcc_memory_error_event_queue.mpd_thread != NULL);
198 mpsc_daemon_enqueue(&mcc_memory_error_event_queue,
199 &event->link, MPSC_QUEUE_DISABLE_PREEMPTION);
200 return KERN_SUCCESS;
201 #else
202 return KERN_FAILURE;
203 #endif
204 }
205
206 #if (DEBUG || DEVELOPMENT)
207 static int
mcc_memory_error_notify_test_run(int64_t in,int64_t * out)208 mcc_memory_error_notify_test_run(int64_t in, int64_t *out)
209 {
210 printf("Running mcc_memory_error_notify_test for %llu iterations\n", in);
211 for (uint64_t i = 0; i < in; i++) {
212 mcc_ecc_event_t event = {.version = MCC_ECC_V1, .status = (uint32_t)i};
213 /**
214 * To accurately test mcc_log_memory_error, we must disable preemption, because it is called
215 * from the primary interrupt context.
216 */
217 disable_preemption();
218 mcc_log_memory_error(event);
219 enable_preemption();
220 }
221
222 *out = 1;
223 return 0;
224 }
225
226 SYSCTL_TEST_REGISTER(mcc_memory_error_notify_test, mcc_memory_error_notify_test_run);
227 #endif /* (DEBUG || DEVELOPMENT) */
228
229
230 /* Legacy ECC logging mechanism */
231
232 /*
233 * ECC data. Not really KPCs, but this still seems like the
234 * best home for this code.
235 *
236 * Circular buffer of events. When we fill up, drop data.
237 */
238 #define ECC_EVENT_BUFFER_COUNT (256)
239
240 struct ecc_event ecc_data[ECC_EVENT_BUFFER_COUNT];
241 static uint32_t ecc_data_next_read;
242 static uint32_t ecc_data_next_write;
243 static boolean_t ecc_data_empty = TRUE; // next read == next write : empty or full?
244 static LCK_GRP_DECLARE(ecc_data_lock_group, "ecc-data");
245 static LCK_SPIN_DECLARE(ecc_data_lock, &ecc_data_lock_group);
246 static uint32_t ecc_correction_count;
247
248
249 uint32_t
ecc_log_get_correction_count()250 ecc_log_get_correction_count()
251 {
252 return ecc_correction_count;
253 }
254
255 kern_return_t
ecc_log_record_event(const struct ecc_event * ev)256 ecc_log_record_event(const struct ecc_event *ev)
257 {
258 spl_t x;
259
260 if (ev->count > ECC_EVENT_INFO_DATA_ENTRIES) {
261 panic("Count of %u on ecc event is too large.", (unsigned)ev->count);
262 }
263
264 x = splhigh();
265 lck_spin_lock(&ecc_data_lock);
266
267 ecc_correction_count++;
268
269 if (ecc_data_next_read == ecc_data_next_write && !ecc_data_empty) {
270 lck_spin_unlock(&ecc_data_lock);
271 splx(x);
272 return KERN_FAILURE;
273 }
274
275 bcopy(ev, &ecc_data[ecc_data_next_write], sizeof(*ev));
276 ecc_data_next_write++;
277 ecc_data_next_write %= ECC_EVENT_BUFFER_COUNT;
278 ecc_data_empty = FALSE;
279
280 lck_spin_unlock(&ecc_data_lock);
281 splx(x);
282
283 return KERN_SUCCESS;
284 }
285
286
287 kern_return_t
ecc_log_get_next_event(struct ecc_event * ev)288 ecc_log_get_next_event(struct ecc_event *ev)
289 {
290 spl_t x;
291
292 x = splhigh();
293 lck_spin_lock(&ecc_data_lock);
294
295 if (ecc_data_empty) {
296 assert(ecc_data_next_write == ecc_data_next_read);
297
298 lck_spin_unlock(&ecc_data_lock);
299 splx(x);
300 return KERN_FAILURE;
301 }
302
303 bcopy(&ecc_data[ecc_data_next_read], ev, sizeof(*ev));
304 ecc_data_next_read++;
305 ecc_data_next_read %= ECC_EVENT_BUFFER_COUNT;
306
307 if (ecc_data_next_read == ecc_data_next_write) {
308 ecc_data_empty = TRUE;
309 }
310
311 lck_spin_unlock(&ecc_data_lock);
312 splx(x);
313
314 return KERN_SUCCESS;
315 }
316