1 /* 2 * Copyright (c) 2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #pragma once 30 31 #include <mach/kern_return.h> 32 #include <stdint.h> 33 #include <sys/cdefs.h> 34 #include <mach/vm_types.h> 35 36 __BEGIN_DECLS 37 38 #ifdef XNU_KERNEL_PRIVATE 39 extern ppnum_t *ecc_bad_pages; 40 extern uint32_t ecc_bad_pages_count; 41 42 /* Counts for sysctls*/ 43 extern uint32_t vm_ecc_db_pages_count; 44 extern uint32_t vm_ecc_zero_pages_count; 45 extern uint32_t vm_ecc_panic_pages_count; 46 extern uint32_t vm_ecc_max_db_pages; 47 #endif 48 49 /* Old ECC logging mechanism */ 50 51 #define ECC_EVENT_INFO_DATA_ENTRIES 8 52 struct ecc_event { 53 uint8_t id; // ID of memory (e.g. L2C), platform-specific 54 uint8_t count; // Of uint64_t's used, starting at index 0 55 uint64_t data[ECC_EVENT_INFO_DATA_ENTRIES] __attribute__((aligned(8))); // Event-specific data 56 }; 57 58 #ifdef KERNEL_PRIVATE 59 extern kern_return_t ecc_log_record_event(const struct ecc_event *ev); 60 #endif 61 62 #ifdef XNU_KERNEL_PRIVATE 63 #include <mach/vm_param.h> 64 65 #define ECC_PANIC_PAGE_MAGIC 0xEC 66 #define ECC_PANIC_PAGE_SIGN ((1ULL << 63) | (ECC_PANIC_PAGE_MAGIC)) 67 #define ECC_PANIC_PAGE_MASK ((1ULL << 63) | (PAGE_MASK)) 68 extern kern_return_t ecc_log_get_next_event(struct ecc_event *ev); 69 extern uint32_t ecc_log_get_correction_count(void); 70 #endif 71 72 #define ECC_TESTING (DEVELOPMENT || DEBUG) 73 74 /* New CoreAnalytics ECC logging mechanism */ 75 76 #define VM_ECC_PAGE_POISON_GRANULE_SHIFT (7) 77 #define VM_ECC_PAGE_POISON_GRANULE (1 << VM_ECC_PAGE_POISON_GRANULE_SHIFT) 78 79 /* Flags to describe ECC memory errors */ 80 __options_decl(ecc_flags_t, uint32_t, { 81 ECC_NONE = 0x00000000, 82 /* An error is correctable (1) or uncorrectable (0). */ 83 ECC_IS_CORRECTABLE = 0x00000001, 84 /* The database is corrupt. */ 85 ECC_DB_CORRUPTED = 0x00000002, 86 /* The error was injected for testing purposes. */ 87 ECC_IS_TEST_ERROR = 0x00000004, 88 /* Do not trigger a CA report, just record to the DB (for testing purposes) */ 89 ECC_DB_ONLY = 0x00000008, 90 }); 91 92 /** 93 * ECC versions. 94 */ 95 __options_decl(ecc_version_t, uint32_t, { 96 ECC_V1, 97 98 // Metadata 99 ECC_NUM_VERSIONS 100 }); 101 102 /** 103 * ECC event descriptor. 104 * 105 * @note If a new ECC version has been added (e.g. future hardware must 106 * log new or different data) new fields should be appended to this struct to 107 * represent the new data. No fields should be deleted from this struct unless 108 * the field corresponds only to hardware that has been deprecated. 109 */ 110 typedef struct { 111 /* Version of this struct. */ 112 ecc_version_t version; 113 /* Flags describing the reported error. */ 114 ecc_flags_t flags; 115 /* Physical address of failure */ 116 uint64_t physaddr; 117 /* Number of CEs reported at physaddr */ 118 uint32_t ce_count; 119 /* Vendor ID */ 120 uint32_t vendor; 121 /* Reserved for future extension to report row, column, bank, etc. */ 122 uint32_t reserved[4]; 123 } ecc_event_t; 124 _Static_assert(sizeof(ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs"); 125 126 /** 127 * platform_error_handler_ecc_poll_t is the type of callback registered by the 128 * platform error handler that xnu can use to poll for ECC data. 129 */ 130 typedef int (*platform_error_handler_ecc_poll_t)(uint64_t *addrs, uint32_t *error_count); 131 kern_return_t kern_ecc_poll_register(platform_error_handler_ecc_poll_t poll_func, uint32_t max_errors); 132 133 /* Flags to describe MCC memory errors */ 134 __options_decl(mcc_flags_t, uint32_t, { 135 MCC_NONE = 0x00000000, 136 MCC_IS_SINGLE_BIT = 0x00000001, 137 MCC_IS_MULTI_BIT = 0x00000002, 138 }); 139 140 /** 141 * MCC ECC versions. 142 */ 143 typedef enum { 144 MCC_ECC_V1, 145 146 // Metadata 147 MCC_ECC_NUM_VERSIONS 148 } mcc_ecc_version_t; 149 150 /** 151 * MCC ECC event descriptor. 152 * 153 * @note If a new MCC ECC version has been added, because i.e. future hardware must log new or different data, 154 * new fields should be appended to this struct to represent the new data. No fields should be 155 * deleted from this struct unless the field corresponds only to hardware that has been deprecated. 156 */ 157 typedef struct { 158 /* Version of this struct. */ 159 mcc_ecc_version_t version; 160 /* Flags used to describe the error. */ 161 mcc_flags_t flags; 162 /* Interrupt status at the time of the MCC error. */ 163 uint32_t status; 164 /* AMCC on which the error occurred. */ 165 uint32_t amcc; 166 /* Plane of the AMCC on which the error occurred. */ 167 uint32_t plane; 168 /* MemCache error Bank of first one bit error. */ 169 uint32_t bank; 170 /* MemCache error Way of first one bit error. */ 171 uint32_t way; 172 /* MemCache error Index of first one bit error. */ 173 uint32_t index; 174 /* Indicates whether the error is in upper half cache line or lower half cache line. */ 175 uint32_t bit_off_cl; 176 /* MemCache one bit error bit offset of first one bit error with in half cache line. */ 177 uint32_t bit_off_within_hcl; 178 } mcc_ecc_event_t; 179 _Static_assert(sizeof(mcc_ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs"); 180 181 #if KERNEL_PRIVATE 182 183 /** 184 * Logs any memory error. 185 * 186 * This will notify mmaintenanced of the error. The error 187 * will get added to a database of errors and sent to 188 * CoreAnalytics. If ECC_IS_CORRECTABLE == 0, 189 * the address will be added to dramecc.db and will 190 * be retired for the lifetime of the device. 191 * 192 * If it is too early in boot to send a notification directly 193 * to the deamon, the error will be added to an array to be serviced 194 * later by an mpsc_daemon_queue. 195 * 196 * If ECC_IS_CORRECTABLE flag is set with this function, it 197 * assumes one error. If caller wishes to report the CE count 198 * reported by hardware, use ecc_log_memory_error_ce(). 199 * 200 * @param physical_address address that the error occured on 201 * @param ecc_flags flags used to describe the error 202 * 203 * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not 204 */ 205 extern kern_return_t ecc_log_memory_error(uint64_t physical_address, ecc_flags_t ecc_flags); 206 extern kern_return_t ecc_log_memory_error_internal(uint64_t physical_address, ecc_flags_t ecc_flags); 207 208 /* 209 * Used to report delayed errors, scraped after ECC is enabled. 210 */ 211 extern kern_return_t ecc_log_memory_error_delayed(uint64_t physical_address, ecc_flags_t ecc_flags); 212 213 /** 214 * Logs a correctable memory error. 215 * 216 * ECC_IS_CORRECTABLE is implied. Including this flag or not 217 * makes no difference for this function. 218 * 219 * @param physical_address address that the error occured on 220 * @param ecc_flags flags used to describe the error 221 * @param ce_count number of CEs occured on this page reported by HW 222 * 223 * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not 224 */ 225 kern_return_t ecc_log_memory_error_ce(uint64_t physical_address, ecc_flags_t ecc_flags, uint32_t ce_count); 226 227 /** 228 * Logs an MCC error. 229 * 230 * @param event Event to be logged 231 * @returns KERN_SUCCESS on success, KERN_FAILURE otherwise 232 */ 233 kern_return_t 234 mcc_log_memory_error(mcc_ecc_event_t event); 235 236 #endif /* KERNEL_PRIVATE */ 237 238 __END_DECLS 239