1 /* 2 * Copyright (c) 2021 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #ifndef __COMPRESSION_H 30 #define __COMPRESSION_H 31 32 #include <stdint.h> 33 #include <stddef.h> 34 #include <os/base.h> 35 36 /*! 37 * @enum compression_algorithm_t 38 * @abstract Tag used to select a compression algorithm. 39 * @discussion Further details on the supported formats, and their implementation: 40 * 41 * - LZ4 is an extremely high-performance compressor. The open source version 42 * is already one of the fastest compressors of which we are aware, and we 43 * have optimized it still further in our implementation. The encoded format 44 * we produce and consume is compatible with the open source version, except 45 * that we add a very simple frame to the raw stream to allow some additional 46 * validation and functionality. 47 * 48 * The frame is documented here so that you can easily wrap another LZ4 49 * encoder/decoder to produce/consume the same data stream if necessary. An 50 * LZ4 encoded buffer is a sequence of blocks, each of which begins with a 51 * header. There are three possible headers: 52 * 53 * a "compressed block header" is (hex) 62 76 34 31, followed by the 54 * size in bytes of the decoded (plaintext) data represented by the 55 * block and the size (in bytes) of the encoded data stored in the 56 * block. Both size fields are stored as (possibly unaligned) 32-bit 57 * little-endian values. The compressed block header is followed 58 * immediately by the actual lz4-encoded data stream. 59 * 60 * an "uncompressed block header" is (hex) 62 76 34 2d, followed by the 61 * size of the data stored in the uncompressed block as a (possibly 62 * unaligned) 32-bit little-endian value. The uncompressed block header 63 * is followed immediately by the uncompressed data buffer of the 64 * specified size. 65 * 66 * an "end of stream header" is (hex) 62 76 34 24, and marks the end 67 * of the lz4 frame. No further data may be written or read beyond 68 * this header. 69 * 70 * - SMB (Server Message Block) is a protocol for sharing files, printers 71 * and other abstractions over a computer network. SMB supports compression 72 * to speed up transfers. The following SMB compression algorithms are 73 * supported: 74 * 75 * ---------------|---------|---------|-------|--------------------------- 76 * Algorithm | Encoder | Decoder | Ratio | Encoder / decoder memory 77 * ---------------|---------|---------|-------|--------------------------- 78 * LZ77 | fastest | fastest | 2.3x | 66 KB / 0 KB 79 * LZ77+Huffman | slowest | slowest | 2.8x | 172 KB / 6 KB 80 * LZNT1 | fast | fastest | 2.0x | 33 KB / 0 KB 81 * ---------------|---------|---------|-------|--------------------------- 82 */ 83 84 typedef enum{ 85 COMPRESSION_LZ4 = 0x100, // LZ4 + simple frame format (buffer + stream API) 86 COMPRESSION_LZ4_RAW = 0x101, // LZ4 (buffer API only) 87 COMPRESSION_SMB_LZNT1 = 0xC00, // SMB LZNT1 (buffer API only) 88 COMPRESSION_SMB_LZ77 = 0xC10, // SMB LZ77 (buffer API only) 89 COMPRESSION_SMB_LZ77H = 0xC20, // SMB LZ77-HUFF (buffer API only) 90 } compression_algorithm_t; 91 92 // ================================================================================================================= 93 #pragma mark - Buffer API 94 95 /*! 96 * @abstract Get the minimum scratch buffer size for the specified compression algorithm encoder. 97 * @param algorithm The compression algorithm for which the scratch space will be used. 98 * @return The number of bytes to allocate as a scratch buffer for use to encode with the specified 99 * compression algorithm. This number may be 0. 100 */ 101 typedef size_t (*compression_encode_scratch_buffer_size_proc) 102 (compression_algorithm_t algorithm); 103 104 /*! 105 * @abstract Compresses a buffer. 106 * @param dst_buffer Pointer to the first byte of the destination buffer. 107 * @param dst_size Size of the destination buffer in bytes. 108 * @param src_buffer Pointer to the first byte of the source buffer. 109 * @param src_size Size of the source buffer in bytes. 110 * @param scratch_buffer A pointer to scratch space that the routine can use for temporary 111 * storage during compression. To determine how much space to allocate for this 112 * scratch space, call compression_encode_scratch_buffer_size(algorithm). Scratch space 113 * may be re-used across multiple (serial) calls to _encode and _decode. 114 * Can be NULL, if an algorithm does not need any scratch space. 115 * @param algorithm The compression algorithm to be used. 116 * @return The number of bytes written to the destination buffer if the input is 117 * is successfully compressed. If the entire input cannot be compressed to fit 118 * into the provided destination buffer, or an error occurs, 0 is returned. 119 */ 120 typedef size_t (*compression_encode_buffer_proc) 121 (uint8_t* dst_buffer, size_t dst_size, 122 const uint8_t* src_buffer, size_t src_size, 123 void* scratch_buffer, compression_algorithm_t algorithm); 124 125 /*! 126 * @abstract Get the minimum scratch buffer size for the specified compression algorithm decoder. 127 * @param algorithm The compression algorithm for which the scratch space will be used. 128 * @return The number of bytes to allocate as a scratch buffer for use to decode with the specified 129 * compression algorithm. This number may be 0. 130 */ 131 typedef size_t (*compression_decode_scratch_buffer_size_proc) 132 (compression_algorithm_t algorithm); 133 134 /*! 135 * @abstract Decompresses a buffer. 136 * @param dst_buffer Pointer to the first byte of the destination buffer. 137 * @param dst_size Size of the destination buffer in bytes. 138 * @param src_buffer Pointer to the first byte of the source buffer. 139 * @param src_size Size of the source buffer in bytes. 140 * @param scratch_buffer A pointer to scratch space that the routine can use for temporary 141 * storage during decompression. To determine how much space to allocate for this 142 * scratch space, call compression_decode_scratch_buffer_size(algorithm). Scratch space 143 * may be re-used across multiple (serial) calls to _encode and _decode. 144 * Can be NULL, if an algorithm does not need any scratch space. 145 * @param algorithm The compression algorithm to be used. 146 * @return The number of bytes written to the destination buffer if the input is 147 * is successfully decompressed. If there is not enough space in the destination 148 * buffer to hold the entire expanded output, only the first dst_size bytes will 149 * be written to the buffer and dst_size is returned. Note that this behavior 150 * differs from that of compression_encode. If an error occurs, 0 is returned. 151 * SMB algorithms do not support truncated decodes. 152 * SMB algorithms expect src_size to be exactly the size of the compressed input. 153 */ 154 typedef size_t (*compression_decode_buffer_proc) 155 (uint8_t* dst_buffer, size_t dst_size, 156 const uint8_t* src_buffer, size_t src_size, 157 void* scratch_buffer, compression_algorithm_t algorithm); 158 159 // ================================================================================================================= 160 #pragma mark - Stream API 161 162 /* Return values for the compression_stream functions. */ 163 typedef enum{ 164 COMPRESSION_STATUS_OK = 0, 165 COMPRESSION_STATUS_ERROR = -1, 166 COMPRESSION_STATUS_END = 1, 167 } compression_status_t; 168 169 typedef enum{ 170 COMPRESSION_STREAM_ENCODE = 0, /* Encode to a compressed stream */ 171 COMPRESSION_STREAM_DECODE = 1, /* Decode from a compressed stream */ 172 } compression_stream_operation_t; 173 174 /* Bits for the flags in compression_stream_process. */ 175 typedef enum{ 176 COMPRESSION_STREAM_FINALIZE = 0x0001, 177 } compression_stream_flags_t; 178 179 typedef struct{ 180 /* 181 * You are partially responsible for management of the dst_ptr, 182 * dst_size, src_ptr, and src_size fields. You must initialize 183 * them to describe valid memory buffers before making a call to 184 * compression_stream_process. compression_stream_process will update 185 * these fields before returning to account for the bytes of the src 186 * and dst buffers that were successfully processed. 187 */ 188 uint8_t* dst_ptr; 189 size_t dst_size; 190 const uint8_t* src_ptr; 191 size_t src_size; 192 193 /* The stream state object is managed by the compression_stream functions. 194 * You should not ever directly access this field. */ 195 void* state; 196 } compression_stream_t; 197 198 /* There are two critical features of the stream interfaces: 199 * 200 * - They allow encoding and decoding to be resumed from where it ended 201 * when the end of a source or destination block was reached. 202 * 203 * - When resuming, the new source and destination blocks need not be 204 * contiguous with earlier blocks in the stream; all necessary state 205 * to resume compression is represented by the compression_stream_t object. 206 * 207 * These two properties enable tasks like: 208 * 209 * - Decoding a compressed stream into a buffer with the ability to grow 210 * the buffer and resume decoding if the expanded stream is too large 211 * to fit without repeating any work. 212 * 213 * - Encoding a stream as pieces of it become available without ever needing 214 * to create an allocation large enough to hold all the uncompressed data. 215 * 216 * The basic workflow for using the stream interface is as follows: 217 * 218 * 1. initialize the state of your compression_stream object by calling 219 * compression_stream_init with the operation parameter set to specify 220 * whether you will be encoding or decoding, and the chosen algorithm 221 * specified by the algorithm parameter. This will allocate storage 222 * for the state that allows encoding or decoding to be resumed 223 * across calls. 224 * 225 * 2. set the dst_buffer, dst_size, src_buffer, and src_size fields of 226 * the compression_stream object to point to the next blocks to be 227 * processed. 228 * 229 * 3. call compression_stream_process. If no further input will be added 230 * to the stream via subsequent calls, finalize should be non-zero. 231 * If compression_stream_process returns COMPRESSION_STATUS_END, there 232 * will be no further output from the stream. 233 * 234 * 4. repeat steps 2 and 3 as necessary to process the entire stream. 235 * 236 * 5. call compression_stream_destroy to free the state object in the 237 * compression_stream. 238 */ 239 240 /*! 241 * @abstract Initialize a compression_stream for 242 * encoding (if operation is COMPRESSION_STREAM_ENCODE) or 243 * decoding (if operation is COMPRESSION_STREAM_DECODE). 244 * @param stream Pointer to the compression_stream object to be initialized. 245 * @param operation Specifies whether the stream is to initialized for encoding or decoding. 246 * Must be either COMPRESSION_STREAM_ENCODE or COMPRESSION_STREAM_DECODE. 247 * @param algorithm The compression algorithm to be used. Must be one of the values specified 248 * in the compression_algorithm_t enum. 249 * @discussion This call initializes all fields of the compression_stream to zero, except for state; 250 * this routine allocates storage to capture the internal state of the encoding or decoding 251 * process so that it may be resumed. This storage is tracked via the state parameter. 252 * @return COMPRESSION_STATUS_OK if the stream was successfully initialized, or 253 * COMPRESSION_STATUS_ERROR if an error occurred. 254 */ 255 typedef compression_status_t (*compression_stream_init_proc) 256 (compression_stream_t* stream, 257 compression_stream_operation_t operation, 258 compression_algorithm_t algorithm); 259 260 /*! 261 * @abstract Functionally equivalent to compression_stream_destroy then compression_stream_init, but keeps the allocated state buffer. 262 * @return Status of the virtual compression_stream_init call 263 */ 264 typedef compression_status_t (*compression_stream_reinit_proc) 265 (compression_stream_t* stream, 266 compression_stream_operation_t operation, 267 compression_algorithm_t algorithm); 268 269 /*! 270 * @abstract Cleans up state information stored in a compression_stream object. 271 * @discussion Use this to free memory allocated by compression_stream_init. After calling 272 * this function, you will need to re-init the compression_stream object before 273 * using it again. 274 */ 275 typedef compression_status_t (*compression_stream_destroy_proc) 276 (compression_stream_t* stream); 277 278 /*! 279 * @abstract Encodes or decodes a block of the stream. 280 * @param stream Pointer to the compression_stream object to be operated on. Before calling 281 * this function, you must initialize the stream object by calling 282 * compression_stream_init, and setting the user-managed fields to describe your 283 * input and output buffers. When compression_stream_process returns, those 284 * fields will have been updated to account for the bytes that were successfully 285 * encoded or decoded in the course of its operation. 286 * @param flags Binary OR of zero or more compression_stream_flags: 287 * COMPRESSION_STREAM_FINALIZE 288 * If set, indicates that no further input will be added to the stream, and 289 * thus that the end of stream should be indicated if the input block is 290 * completely processed. 291 * @discussion Processes the buffers described by the stream object until the source buffer 292 * becomes empty, or the destination buffer becomes full, or the entire stream is 293 * processed, or an error is encountered. 294 * @return When encoding COMPRESSION_STATUS_END is returned only if all input has been 295 * read from the source, all output (including an end-of-stream marker) has been 296 * written to the destination, and COMPRESSION_STREAM_FINALIZE bit is set. 297 * 298 * When decoding COMPRESSION_STATUS_END is returned only if all input (including 299 * and end-of-stream marker) has been read from the source, and all output has 300 * been written to the destination. 301 * 302 * COMPRESSION_STATUS_OK is returned if all data in the source buffer is consumed, 303 * or all space in the destination buffer is used. In that case, further calls 304 * to compression_stream_process are expected, providing more data in the source 305 * buffer, or more space in the destination buffer. 306 * 307 * COMPRESSION_STATUS_ERROR is returned if an error is encountered (if the 308 * encoded data is corrupted, for example). 309 * 310 * When decoding a valid stream, the end of stream will be detected from the contents 311 * of the input, and COMPRESSION_STATUS_END will be returned in that case, even if 312 * COMPRESSION_STREAM_FINALIZE is not set, or more input is provided. 313 * 314 * When decoding a corrupted or truncated stream, if COMPRESSION_STREAM_FINALIZE is not 315 * set to notify the decoder that no more input is coming, the decoder will not consume 316 * or produce any data, and return COMPRESSION_STATUS_OK. In that case, the client code 317 * will call compression_stream_process again with the same state, entering an infinite loop. 318 * To avoid this, it is strongly advised to always set COMPRESSION_STREAM_FINALIZE when 319 * no more input is expected, for both encoding and decoding. 320 */ 321 typedef compression_status_t (*compression_stream_process_proc) 322 (compression_stream_t* stream, int flags); 323 324 /*! 325 * @abstract Identify the compression algorithm for the first 4 bytes of compressed data. 326 * @param data Points to 4 bytes at the beginning of the compressed data. 327 * @discussion This call identifies the compression algorithm used to generate the given data bytes. 328 * @return A valid compression_algorithm_t on success, or -1 if the data bytes do not correspond to any supported algorithm. 329 */ 330 typedef int (*compression_stream_identify_algorithm_proc) 331 (const uint8_t* data); 332 333 // ================================================================================================================= 334 #pragma mark - Kernel interface 335 336 typedef struct{ 337 // Stream API 338 compression_stream_init_proc compression_stream_init; 339 compression_stream_reinit_proc compression_stream_reinit; 340 compression_stream_destroy_proc compression_stream_destroy; 341 compression_stream_process_proc compression_stream_process; 342 compression_stream_identify_algorithm_proc compression_stream_identify_algorithm; 343 344 // Buffer API 345 compression_encode_scratch_buffer_size_proc compression_encode_scratch_buffer_size; 346 compression_encode_buffer_proc compression_encode_buffer; 347 compression_decode_scratch_buffer_size_proc compression_decode_scratch_buffer_size; 348 compression_decode_buffer_proc compression_decode_buffer; 349 } compression_ki_t; 350 351 __BEGIN_DECLS 352 353 /** 354 * @abstract The compression interface that was registered. 355 */ 356 extern const compression_ki_t * compression_ki_ptr; 357 358 /** 359 * @abstract Registers the compression kext interface for use within the kernel proper. 360 * @param ki The interface to register. 361 * @discussion This routine may only be called once and must be called before late-const has been applied to kernel memory. 362 */ 363 OS_EXPORT OS_NONNULL1 364 void compression_interface_register(const compression_ki_t *ki); 365 366 #if PRIVATE 367 368 typedef void (*registration_callback_t)(void); 369 370 void compression_interface_set_registration_callback(registration_callback_t callback); 371 372 #endif /* PRIVATE */ 373 374 __END_DECLS 375 376 #endif // __COMPRESSION_H 377