1 /* 2 * Copyright (c) 2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #pragma once 30 31 #include <mach/kern_return.h> 32 #include <stdint.h> 33 #include <sys/cdefs.h> 34 #include <mach/vm_types.h> 35 36 __BEGIN_DECLS 37 38 #ifdef XNU_KERNEL_PRIVATE 39 extern ppnum_t *ecc_bad_pages; 40 extern uint32_t ecc_bad_pages_count; 41 42 /* Counts for sysctls*/ 43 extern uint32_t vm_ecc_db_pages_count; 44 extern uint32_t vm_ecc_zero_pages_count; 45 extern uint32_t vm_ecc_panic_pages_count; 46 extern uint32_t vm_ecc_max_db_pages; 47 #endif 48 49 /* Old ECC logging mechanism */ 50 51 #define ECC_EVENT_INFO_DATA_ENTRIES 8 52 struct ecc_event { 53 uint8_t id; // ID of memory (e.g. L2C), platform-specific 54 uint8_t count; // Of uint64_t's used, starting at index 0 55 uint64_t data[ECC_EVENT_INFO_DATA_ENTRIES] __attribute__((aligned(8))); // Event-specific data 56 }; 57 58 #ifdef KERNEL_PRIVATE 59 extern kern_return_t ecc_log_record_event(const struct ecc_event *ev); 60 #endif 61 62 #ifdef XNU_KERNEL_PRIVATE 63 extern kern_return_t ecc_log_get_next_event(struct ecc_event *ev); 64 extern uint32_t ecc_log_get_correction_count(void); 65 #endif 66 67 #define ECC_TESTING (DEVELOPMENT || DEBUG) 68 69 /* New CoreAnalytics ECC logging mechanism */ 70 71 #define VM_ECC_PAGE_POISON_GRANULE_SHIFT (7) 72 #define VM_ECC_PAGE_POISON_GRANULE (1 << VM_ECC_PAGE_POISON_GRANULE_SHIFT) 73 74 /* Flags to describe ECC memory errors */ 75 __options_decl(ecc_flags_t, uint32_t, { 76 ECC_NONE = 0x00000000, 77 /* An error is correctable (1) or uncorrectable (0). */ 78 ECC_IS_CORRECTABLE = 0x00000001, 79 /* The database is corrupt. */ 80 ECC_DB_CORRUPTED = 0x00000002, 81 /* The error was injected for testing purposes. */ 82 ECC_IS_TEST_ERROR = 0x00000004, 83 /* Do not trigger a CA report, just record to the DB (for testing purposes) */ 84 ECC_DB_ONLY = 0x00000008, 85 }); 86 87 /** 88 * ECC versions. 89 */ 90 __options_decl(ecc_version_t, uint32_t, { 91 ECC_V1, 92 93 // Metadata 94 ECC_NUM_VERSIONS 95 }); 96 97 /** 98 * ECC event descriptor. 99 * 100 * @note If a new ECC version has been added (e.g. future hardware must 101 * log new or different data) new fields should be appended to this struct to 102 * represent the new data. No fields should be deleted from this struct unless 103 * the field corresponds only to hardware that has been deprecated. 104 */ 105 typedef struct { 106 /* Version of this struct. */ 107 ecc_version_t version; 108 /* Flags describing the reported error. */ 109 ecc_flags_t flags; 110 /* Physical address of failure */ 111 uint64_t physaddr; 112 /* Number of CEs reported at physaddr */ 113 uint32_t ce_count; 114 /* Vendor ID */ 115 uint32_t vendor; 116 /* Reserved for future extension to report row, column, bank, etc. */ 117 uint32_t reserved[4]; 118 } ecc_event_t; 119 _Static_assert(sizeof(ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs"); 120 121 /** 122 * platform_error_handler_ecc_poll_t is the type of callback registered by the 123 * platform error handler that xnu can use to poll for ECC data. 124 */ 125 typedef int (*platform_error_handler_ecc_poll_t)(uint64_t *addrs, uint32_t *error_count); 126 kern_return_t kern_ecc_poll_register(platform_error_handler_ecc_poll_t poll_func, uint32_t max_errors); 127 128 /* Flags to describe MCC memory errors */ 129 __options_decl(mcc_flags_t, uint32_t, { 130 MCC_NONE = 0x00000000, 131 MCC_IS_SINGLE_BIT = 0x00000001, 132 MCC_IS_MULTI_BIT = 0x00000002, 133 }); 134 135 /** 136 * MCC ECC versions. 137 */ 138 typedef enum { 139 MCC_ECC_V1, 140 141 // Metadata 142 MCC_ECC_NUM_VERSIONS 143 } mcc_ecc_version_t; 144 145 /** 146 * MCC ECC event descriptor. 147 * 148 * @note If a new MCC ECC version has been added, because i.e. future hardware must log new or different data, 149 * new fields should be appended to this struct to represent the new data. No fields should be 150 * deleted from this struct unless the field corresponds only to hardware that has been deprecated. 151 */ 152 typedef struct { 153 /* Version of this struct. */ 154 mcc_ecc_version_t version; 155 /* Flags used to describe the error. */ 156 mcc_flags_t flags; 157 /* Interrupt status at the time of the MCC error. */ 158 uint32_t status; 159 /* AMCC on which the error occurred. */ 160 uint32_t amcc; 161 /* Plane of the AMCC on which the error occurred. */ 162 uint32_t plane; 163 /* MemCache error Bank of first one bit error. */ 164 uint32_t bank; 165 /* MemCache error Way of first one bit error. */ 166 uint32_t way; 167 /* MemCache error Index of first one bit error. */ 168 uint32_t index; 169 /* Indicates whether the error is in upper half cache line or lower half cache line. */ 170 uint32_t bit_off_cl; 171 /* MemCache one bit error bit offset of first one bit error with in half cache line. */ 172 uint32_t bit_off_within_hcl; 173 } mcc_ecc_event_t; 174 _Static_assert(sizeof(mcc_ecc_event_t) == 10 * sizeof(uint32_t), "ecc_event_t size must be updated in memory_error_notification.defs"); 175 176 #if KERNEL_PRIVATE 177 178 /** 179 * Logs any memory error. 180 * 181 * This will notify mmaintenanced of the error. The error 182 * will get added to a database of errors and sent to 183 * CoreAnalytics. If ECC_IS_CORRECTABLE == 0, 184 * the address will be added to dramecc.db and will 185 * be retired for the lifetime of the device. 186 * 187 * If it is too early in boot to send a notification directly 188 * to the deamon, the error will be added to an array to be serviced 189 * later by an mpsc_daemon_queue. 190 * 191 * If ECC_IS_CORRECTABLE flag is set with this function, it 192 * assumes one error. If caller wishes to report the CE count 193 * reported by hardware, use ecc_log_memory_error_ce(). 194 * 195 * @param physical_address address that the error occured on 196 * @param ecc_flags flags used to describe the error 197 * 198 * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not 199 */ 200 extern kern_return_t ecc_log_memory_error(uint64_t physical_address, ecc_flags_t ecc_flags); 201 extern kern_return_t ecc_log_memory_error_internal(uint64_t physical_address, ecc_flags_t ecc_flags); 202 203 /* 204 * Used to report delayed errors, scraped after ECC is enabled. 205 */ 206 extern kern_return_t ecc_log_memory_error_delayed(uint64_t physical_address, ecc_flags_t ecc_flags); 207 208 /** 209 * Logs a correctable memory error. 210 * 211 * ECC_IS_CORRECTABLE is implied. Including this flag or not 212 * makes no difference for this function. 213 * 214 * @param physical_address address that the error occured on 215 * @param ecc_flags flags used to describe the error 216 * @param ce_count number of CEs occured on this page reported by HW 217 * 218 * @returns KERN_SUCCESS if logging supported by hw, KERN_FAILURE if not 219 */ 220 kern_return_t ecc_log_memory_error_ce(uint64_t physical_address, ecc_flags_t ecc_flags, uint32_t ce_count); 221 222 /** 223 * Logs an MCC error. 224 * 225 * @param event Event to be logged 226 * @returns KERN_SUCCESS on success, KERN_FAILURE otherwise 227 */ 228 kern_return_t 229 mcc_log_memory_error(mcc_ecc_event_t event); 230 231 #endif /* KERNEL_PRIVATE */ 232 233 __END_DECLS 234