1 /* 2 * Copyright (c) 2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #pragma once 30 31 #include <mach/kern_return.h> 32 #include <stdint.h> 33 #include <sys/cdefs.h> 34 #include <mach/vm_types.h> 35 36 __BEGIN_DECLS 37 38 #ifdef XNU_KERNEL_PRIVATE 39 extern ppnum_t *ecc_bad_pages; 40 extern uint32_t ecc_bad_pages_count; 41 42 /* Counts for sysctls*/ 43 extern uint32_t vm_ecc_db_pages_count; 44 extern uint32_t vm_ecc_zero_pages_count; 45 extern uint32_t vm_ecc_panic_pages_count; 46 extern uint32_t vm_ecc_max_db_pages; 47 #endif 48 49 /* Old ECC logging mechanism */ 50 51 #define ECC_EVENT_INFO_DATA_ENTRIES 8 52 struct ecc_event { 53 uint8_t id; // ID of memory (e.g. L2C), platform-specific 54 uint8_t count; // Of uint64_t's used, starting at index 0 55 uint64_t data[ECC_EVENT_INFO_DATA_ENTRIES] __attribute__((aligned(8))); // Event-specific data 56 }; 57 58 #ifdef KERNEL_PRIVATE 59 extern kern_return_t ecc_log_record_event(const struct ecc_event *ev); 60 #endif 61 62 #ifdef XNU_KERNEL_PRIVATE 63 #include <mach/vm_param.h> 64 65 #define ECC_PANIC_PAGE_MAGIC 0xEC 66 #define ECC_PANIC_PAGE_SIGN ((1ULL << 63) | (ECC_PANIC_PAGE_MAGIC)) 67 #define ECC_PANIC_PAGE_MASK ((1ULL << 63) | (PAGE_MASK)) 68 extern kern_return_t ecc_log_get_next_event(struct ecc_event *ev); 69 extern uint32_t ecc_log_get_correction_count(void); 70 #endif 71 72 #define ECC_TESTING (DEVELOPMENT || DEBUG) 73 74 /* New CoreAnalytics ECC logging mechanism */ 75 76 #define VM_ECC_PAGE_POISON_GRANULE_SHIFT (7) 77 #define VM_ECC_PAGE_POISON_GRANULE (1 << VM_ECC_PAGE_POISON_GRANULE_SHIFT) 78 79 /* Flags to describe ECC memory errors */ 80 __options_decl(ecc_flags_t, uint32_t, { 81 ECC_NONE = 0x00000000, 82 /* An error is correctable (1) or uncorrectable (0). */ 83 ECC_IS_CORRECTABLE = 0x00000001, 84 /* The database is corrupt. */ 85 ECC_DB_CORRUPTED = 0x00000002, 86 /* The error was injected for testing purposes. */ 87 ECC_IS_TEST_ERROR = 0x00000004, 88 /* Do not trigger a CA report, just record to the DB (for testing purposes) */ 89 ECC_DB_ONLY = 0x00000008, 90 /* Filter out the given address from the DB*/ 91 ECC_REMOVE_ADDR = 0x00000010 92 }); 93 94 /** 95 * ECC versions. 96 */ 97 __options_decl(ecc_version_t, uint32_t, { 98 ECC_V1, 99 100 // Metadata 101 ECC_NUM_VERSIONS 102 }); 103 104 /** 105 * ECC event descriptor. 106 * 107 * @note If a new ECC version has been added (e.g. future hardware must 108 * log new or different data) new fields should be appended to this struct to 109 * represent the new data. No fields should be deleted from this struct unless 110 * the field corresponds only to hardware that has been deprecated. 111 */ 112 typedef struct { 113 /* Version of this struct. */ 114 ecc_version_t version; 115 /* Flags describing the reported error. */ 116 ecc_flags_t flags; 117 /* Physical address of failure */ 118 uint64_t physaddr; 119 /* Number of CEs reported at physaddr */ 120 uint32_t ce_count; 121 /* Vendor ID */ 122 uint32_t vendor; 123 /* Reserved for future extension to report row, column, bank, etc. */ 124 uint32_t reserved[4]; 125 } ecc_event_t; 126 _Static_assert(sizeof(ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs"); 127 128 /** 129 * platform_error_handler_ecc_poll_t is the type of callback registered by the 130 * platform error handler that xnu can use to poll for ECC data. 131 */ 132 typedef int (*platform_error_handler_ecc_poll_t)(uint64_t *addrs, uint32_t *error_count); 133 kern_return_t kern_ecc_poll_register(platform_error_handler_ecc_poll_t poll_func, uint32_t max_errors); 134 135 /* Flags to describe MCC memory errors */ 136 __options_decl(mcc_flags_t, uint32_t, { 137 MCC_NONE = 0x00000000, 138 MCC_IS_SINGLE_BIT = 0x00000001, 139 MCC_IS_MULTI_BIT = 0x00000002, 140 }); 141 142 /** 143 * MCC ECC versions. 144 */ 145 typedef enum { 146 MCC_ECC_V1, 147 148 // Metadata 149 MCC_ECC_NUM_VERSIONS 150 } mcc_ecc_version_t; 151 152 /** 153 * MCC ECC event descriptor. 154 * 155 * @note If a new MCC ECC version has been added, because i.e. future hardware must log new or different data, 156 * new fields should be appended to this struct to represent the new data. No fields should be 157 * deleted from this struct unless the field corresponds only to hardware that has been deprecated. 158 */ 159 typedef struct { 160 /* Version of this struct. */ 161 mcc_ecc_version_t version; 162 /* Flags used to describe the error. */ 163 mcc_flags_t flags; 164 /* Interrupt status at the time of the MCC error. */ 165 uint32_t status; 166 /* AMCC on which the error occurred. */ 167 uint32_t amcc; 168 /* Plane of the AMCC on which the error occurred. */ 169 uint32_t plane; 170 /* MemCache error Bank of first one bit error. */ 171 uint32_t bank; 172 /* MemCache error Way of first one bit error. */ 173 uint32_t way; 174 /* MemCache error Index of first one bit error. */ 175 uint32_t index; 176 /* Indicates whether the error is in upper half cache line or lower half cache line. */ 177 uint32_t bit_off_cl; 178 /* MemCache one bit error bit offset of first one bit error with in half cache line. */ 179 uint32_t bit_off_within_hcl; 180 } mcc_ecc_event_t; 181 _Static_assert(sizeof(mcc_ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs"); 182 183 #if KERNEL_PRIVATE 184 185 /** 186 * Logs any memory error. 187 * 188 * This will notify mmaintenanced of the error. The error 189 * will get added to a database of errors and sent to 190 * CoreAnalytics. If ECC_IS_CORRECTABLE == 0, 191 * the address will be added to dramecc.db and will 192 * be retired for the lifetime of the device. 193 * 194 * If it is too early in boot to send a notification directly 195 * to the deamon, the error will be added to an array to be serviced 196 * later by an mpsc_daemon_queue. 197 * 198 * If ECC_IS_CORRECTABLE flag is set with this function, it 199 * assumes one error. If caller wishes to report the CE count 200 * reported by hardware, use ecc_log_memory_error_ce(). 201 * 202 * @param physical_address address that the error occured on 203 * @param ecc_flags flags used to describe the error 204 * 205 * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not 206 */ 207 extern kern_return_t ecc_log_memory_error(uint64_t physical_address, ecc_flags_t ecc_flags); 208 extern kern_return_t ecc_log_memory_error_internal(uint64_t physical_address, ecc_flags_t ecc_flags); 209 210 /* 211 * Used to report delayed errors, scraped after ECC is enabled. 212 */ 213 extern kern_return_t ecc_log_memory_error_delayed(uint64_t physical_address, ecc_flags_t ecc_flags); 214 215 /** 216 * Logs a correctable memory error. 217 * 218 * ECC_IS_CORRECTABLE is implied. Including this flag or not 219 * makes no difference for this function. 220 * 221 * @param physical_address address that the error occured on 222 * @param ecc_flags flags used to describe the error 223 * @param ce_count number of CEs occured on this page reported by HW 224 * 225 * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not 226 */ 227 kern_return_t ecc_log_memory_error_ce(uint64_t physical_address, ecc_flags_t ecc_flags, uint32_t ce_count); 228 229 /** 230 * Logs an MCC error. 231 * 232 * @param event Event to be logged 233 * @returns KERN_SUCCESS on success, KERN_FAILURE otherwise 234 */ 235 kern_return_t 236 mcc_log_memory_error(mcc_ecc_event_t event); 237 238 #endif /* KERNEL_PRIVATE */ 239 240 __END_DECLS 241