xref: /xnu-11215.81.4/libkern/libkern/compression/compression.h (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 /*
2  * Copyright (c) 2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef __COMPRESSION_H
30 #define __COMPRESSION_H
31 
32 #include <stdint.h>
33 #include <stddef.h>
34 #include <os/base.h>
35 
36 /*!
37  *  @enum       compression_algorithm_t
38  *  @abstract   Tag used to select a compression algorithm.
39  *  @discussion Further details on the supported formats, and their implementation:
40  *
41  *              - LZ4 is an extremely high-performance compressor.  The open source version
42  *              is already one of the fastest compressors of which we are aware, and we
43  *              have optimized it still further in our implementation.  The encoded format
44  *              we produce and consume is compatible with the open source version, except
45  *              that we add a very simple frame to the raw stream to allow some additional
46  *              validation and functionality.
47  *
48  *              The frame is documented here so that you can easily wrap another LZ4
49  *              encoder/decoder to produce/consume the same data stream if necessary.  An
50  *              LZ4 encoded buffer is a sequence of blocks, each of which begins with a
51  *              header.  There are three possible headers:
52  *
53  *                   a "compressed block header" is (hex) 62 76 34 31, followed by the
54  *                   size in bytes of the decoded (plaintext) data represented by the
55  *                   block and the size (in bytes) of the encoded data stored in the
56  *                   block.  Both size fields are stored as (possibly unaligned) 32-bit
57  *                   little-endian values.  The compressed block header is followed
58  *                   immediately by the actual lz4-encoded data stream.
59  *
60  *                   an "uncompressed block header" is (hex) 62 76 34 2d, followed by the
61  *                   size of the data stored in the uncompressed block as a (possibly
62  *                   unaligned) 32-bit little-endian value.  The uncompressed block header
63  *                   is followed immediately by the uncompressed data buffer of the
64  *                   specified size.
65  *
66  *                   an "end of stream header" is (hex) 62 76 34 24, and marks the end
67  *                   of the lz4 frame.  No further data may be written or read beyond
68  *                   this header.
69  *
70  *				- SMB (Server Message Block) is a protocol for sharing files, printers
71  *              and other abstractions over a computer network. SMB supports compression
72  *              to speed up transfers. The following SMB compression algorithms are
73  *              supported:
74  *
75  *              ---------------|---------|---------|-------|---------------------------
76  *                Algorithm    | Encoder | Decoder | Ratio | Encoder / decoder memory
77  *              ---------------|---------|---------|-------|---------------------------
78  *                LZ77         | fastest | fastest | 2.3x  |   66 KB / 0 KB
79  *                LZ77+Huffman | slowest | slowest | 2.8x  |  172 KB / 6 KB
80  *                LZNT1        | fast    | fastest | 2.0x  |   33 KB / 0 KB
81  *              ---------------|---------|---------|-------|---------------------------
82  */
83 
84 typedef enum{
85 	COMPRESSION_LZ4       = 0x100, // LZ4 + simple frame format (buffer + stream API)
86 	COMPRESSION_LZ4_RAW   = 0x101, // LZ4 (buffer API only)
87 	COMPRESSION_SMB_LZNT1 = 0xC00, // SMB LZNT1 (buffer API only)
88 	COMPRESSION_SMB_LZ77  = 0xC10, // SMB LZ77 (buffer API only)
89 	COMPRESSION_SMB_LZ77H = 0xC20, // SMB LZ77-HUFF (buffer API only)
90 } compression_algorithm_t;
91 
92 // =================================================================================================================
93 #pragma mark - Buffer API
94 
95 /*!
96  *  @abstract        Get the minimum scratch buffer size for the specified compression algorithm encoder.
97  *  @param algorithm The compression algorithm for which the scratch space will be used.
98  *  @return          The number of bytes to allocate as a scratch buffer for use to encode with the specified
99  *                   compression algorithm. This number may be 0.
100  */
101 typedef size_t (*compression_encode_scratch_buffer_size_proc)
102 (compression_algorithm_t algorithm);
103 
104 /*!
105  *  @abstract             Compresses a buffer.
106  *  @param dst_buffer     Pointer to the first byte of the destination buffer.
107  *  @param dst_size       Size of the destination buffer in bytes.
108  *  @param src_buffer     Pointer to the first byte of the source buffer.
109  *  @param src_size       Size of the source buffer in bytes.
110  *  @param scratch_buffer A pointer to scratch space that the routine can use for temporary
111  *                        storage during compression.  To determine how much space to allocate for this
112  *                        scratch space, call compression_encode_scratch_buffer_size(algorithm).  Scratch space
113  *                        may be re-used across multiple (serial) calls to _encode and _decode.
114  *                        Can be NULL, if an algorithm does not need any scratch space.
115  *  @param algorithm      The compression algorithm to be used.
116  *  @return               The number of bytes written to the destination buffer if the input is
117  *                        is successfully compressed.  If the entire input cannot be compressed to fit
118  *                        into the provided destination buffer, or an error occurs, 0 is returned.
119  */
120 typedef size_t (*compression_encode_buffer_proc)
121 (uint8_t* dst_buffer, size_t dst_size,
122     const uint8_t* src_buffer, size_t src_size,
123     void* scratch_buffer, compression_algorithm_t algorithm);
124 
125 /*!
126  *  @abstract        Get the minimum scratch buffer size for the specified compression algorithm decoder.
127  *  @param algorithm The compression algorithm for which the scratch space will be used.
128  *  @return          The number of bytes to allocate as a scratch buffer for use to decode with the specified
129  *                   compression algorithm. This number may be 0.
130  */
131 typedef size_t (*compression_decode_scratch_buffer_size_proc)
132 (compression_algorithm_t algorithm);
133 
134 /*!
135  *  @abstract             Decompresses a buffer.
136  *  @param dst_buffer     Pointer to the first byte of the destination buffer.
137  *  @param dst_size       Size of the destination buffer in bytes.
138  *  @param src_buffer     Pointer to the first byte of the source buffer.
139  *  @param src_size       Size of the source buffer in bytes.
140  *  @param scratch_buffer A pointer to scratch space that the routine can use for temporary
141  *                        storage during decompression.  To determine how much space to allocate for this
142  *                        scratch space, call compression_decode_scratch_buffer_size(algorithm).  Scratch space
143  *                        may be re-used across multiple (serial) calls to _encode and _decode.
144  *                        Can be NULL, if an algorithm does not need any scratch space.
145  *  @param algorithm      The compression algorithm to be used.
146  *  @return               The number of bytes written to the destination buffer if the input is
147  *                        is successfully decompressed.  If there is not enough space in the destination
148  *                        buffer to hold the entire expanded output, only the first dst_size bytes will
149  *                        be written to the buffer and dst_size is returned.   Note that this behavior
150  *                        differs from that of compression_encode.  If an error occurs, 0 is returned.
151  *                        SMB algorithms do not support truncated decodes.
152  *                        SMB algorithms expect src_size to be exactly the size of the compressed input.
153  */
154 typedef size_t (*compression_decode_buffer_proc)
155 (uint8_t* dst_buffer, size_t dst_size,
156     const uint8_t* src_buffer, size_t src_size,
157     void* scratch_buffer, compression_algorithm_t algorithm);
158 
159 // =================================================================================================================
160 #pragma mark - Stream API
161 
162 /* Return values for the compression_stream functions. */
163 typedef enum{
164 	COMPRESSION_STATUS_OK    =  0,
165 	COMPRESSION_STATUS_ERROR = -1,
166 	COMPRESSION_STATUS_END   =  1,
167 } compression_status_t;
168 
169 typedef enum{
170 	COMPRESSION_STREAM_ENCODE = 0, /* Encode to a compressed stream */
171 	COMPRESSION_STREAM_DECODE = 1, /* Decode from a compressed stream */
172 } compression_stream_operation_t;
173 
174 /* Bits for the flags in compression_stream_process. */
175 typedef enum{
176 	COMPRESSION_STREAM_FINALIZE = 0x0001,
177 } compression_stream_flags_t;
178 
179 typedef struct{
180 	/*
181 	 *  You are partially responsible for management of the dst_ptr,
182 	 *  dst_size, src_ptr, and src_size fields.  You must initialize
183 	 *  them to describe valid memory buffers before making a call to
184 	 *  compression_stream_process. compression_stream_process will update
185 	 *  these fields before returning to account for the bytes of the src
186 	 *  and dst buffers that were successfully processed.
187 	 */
188 	uint8_t*       dst_ptr;
189 	size_t         dst_size;
190 	const uint8_t* src_ptr;
191 	size_t         src_size;
192 
193 	/* The stream state object is managed by the compression_stream functions.
194 	 *  You should not ever directly access this field. */
195 	void*          state;
196 } compression_stream_t;
197 
198 /*  There are two critical features of the stream interfaces:
199  *
200  *     - They allow encoding and decoding to be resumed from where it ended
201  *       when the end of a source or destination block was reached.
202  *
203  *     - When resuming, the new source and destination blocks need not be
204  *       contiguous with earlier blocks in the stream; all necessary state
205  *       to resume compression is represented by the compression_stream_t object.
206  *
207  *   These two properties enable tasks like:
208  *
209  *     - Decoding a compressed stream into a buffer with the ability to grow
210  *       the buffer and resume decoding if the expanded stream is too large
211  *       to fit without repeating any work.
212  *
213  *     - Encoding a stream as pieces of it become available without ever needing
214  *       to create an allocation large enough to hold all the uncompressed data.
215  *
216  *   The basic workflow for using the stream interface is as follows:
217  *
218  *       1. initialize the state of your compression_stream object by calling
219  *       compression_stream_init with the operation parameter set to specify
220  *       whether you will be encoding or decoding, and the chosen algorithm
221  *       specified by the algorithm parameter. This will allocate storage
222  *       for the state that allows encoding or decoding to be resumed
223  *       across calls.
224  *
225  *       2. set the dst_buffer, dst_size, src_buffer, and src_size fields of
226  *       the compression_stream object to point to the next blocks to be
227  *       processed.
228  *
229  *       3. call compression_stream_process. If no further input will be added
230  *       to the stream via subsequent calls, finalize should be non-zero.
231  *       If compression_stream_process returns COMPRESSION_STATUS_END, there
232  *       will be no further output from the stream.
233  *
234  *       4. repeat steps 2 and 3 as necessary to process the entire stream.
235  *
236  *       5. call compression_stream_destroy to free the state object in the
237  *       compression_stream.
238  */
239 
240 /*!
241  *  @abstract         Initialize a compression_stream for
242  *                    encoding (if operation is COMPRESSION_STREAM_ENCODE) or
243  *                    decoding (if operation is COMPRESSION_STREAM_DECODE).
244  *  @param stream     Pointer to the compression_stream object to be initialized.
245  *  @param operation  Specifies whether the stream is to initialized for encoding or decoding.
246  *                    Must be either COMPRESSION_STREAM_ENCODE or COMPRESSION_STREAM_DECODE.
247  *  @param algorithm  The compression algorithm to be used.  Must be one of the values specified
248  *                    in the compression_algorithm_t enum.
249  *  @discussion       This call initializes all fields of the compression_stream to zero, except for state;
250  *                    this routine allocates storage to capture the internal state of the encoding or decoding
251  *                    process so that it may be resumed. This storage is tracked via the state parameter.
252  *  @return           COMPRESSION_STATUS_OK if the stream was successfully initialized, or
253  *                    COMPRESSION_STATUS_ERROR if an error occurred.
254  */
255 typedef compression_status_t (*compression_stream_init_proc)
256 (compression_stream_t* stream,
257     compression_stream_operation_t operation,
258     compression_algorithm_t algorithm);
259 
260 /*!
261  *  @abstract Functionally equivalent to compression_stream_destroy then compression_stream_init, but keeps the allocated state buffer.
262  *  @return   Status of the virtual compression_stream_init call
263  */
264 typedef compression_status_t (*compression_stream_reinit_proc)
265 (compression_stream_t* stream,
266     compression_stream_operation_t operation,
267     compression_algorithm_t algorithm);
268 
269 /*!
270  *  @abstract   Cleans up state information stored in a compression_stream object.
271  *  @discussion Use this to free memory allocated by compression_stream_init.  After calling
272  *              this function, you will need to re-init the compression_stream object before
273  *              using it again.
274  */
275 typedef compression_status_t (*compression_stream_destroy_proc)
276 (compression_stream_t* stream);
277 
278 /*!
279  *  @abstract     Encodes or decodes a block of the stream.
280  *  @param stream Pointer to the compression_stream object to be operated on.  Before calling
281  *                this function, you must initialize the stream object by calling
282  *                compression_stream_init, and setting the user-managed fields to describe your
283  *                input and output buffers. When compression_stream_process returns, those
284  *                fields will have been updated to account for the bytes that were successfully
285  *                encoded or decoded in the course of its operation.
286  *  @param flags  Binary OR of zero or more compression_stream_flags:
287  *                COMPRESSION_STREAM_FINALIZE
288  *                  If set, indicates that no further input will be added to the stream, and
289  *                  thus that the end of stream should be indicated if the input block is
290  *                  completely processed.
291  *  @discussion   Processes the buffers described by the stream object until the source buffer
292  *                becomes empty, or the destination buffer becomes full, or the entire stream is
293  *                processed, or an error is encountered.
294  *  @return       When encoding COMPRESSION_STATUS_END is returned only if all input has been
295  *                read from the source, all output (including an end-of-stream marker) has been
296  *                written to the destination, and COMPRESSION_STREAM_FINALIZE bit is set.
297  *
298  *                When decoding COMPRESSION_STATUS_END is returned only if all input (including
299  *                and end-of-stream marker) has been read from the source, and all output has
300  *                been written to the destination.
301  *
302  *                COMPRESSION_STATUS_OK is returned if all data in the source buffer is consumed,
303  *                or all space in the destination buffer is used. In that case, further calls
304  *                to compression_stream_process are expected, providing more data in the source
305  *                buffer, or more space in the destination buffer.
306  *
307  *                COMPRESSION_STATUS_ERROR is returned if an error is encountered (if the
308  *                encoded data is corrupted, for example).
309  *
310  *                When decoding a valid stream, the end of stream will be detected from the contents
311  *                of the input, and COMPRESSION_STATUS_END will be returned in that case, even if
312  *                COMPRESSION_STREAM_FINALIZE is not set, or more input is provided.
313  *
314  *                When decoding a corrupted or truncated stream, if COMPRESSION_STREAM_FINALIZE is not
315  *                set to notify the decoder that no more input is coming, the decoder will not consume
316  *                or produce any data, and return COMPRESSION_STATUS_OK.  In that case, the client code
317  *                will call compression_stream_process again with the same state, entering an infinite loop.
318  *                To avoid this, it is strongly advised to always set COMPRESSION_STREAM_FINALIZE when
319  *                no more input is expected, for both encoding and decoding.
320  */
321 typedef compression_status_t (*compression_stream_process_proc)
322 (compression_stream_t* stream, int flags);
323 
324 /*!
325  *  @abstract   Identify the compression algorithm for the first 4 bytes of compressed data.
326  *  @param data Points to 4 bytes at the beginning of the compressed data.
327  *  @discussion This call identifies the compression algorithm used to generate the given data bytes.
328  *  @return     A valid compression_algorithm_t on success, or -1 if the data bytes do not correspond to any supported algorithm.
329  */
330 typedef int (*compression_stream_identify_algorithm_proc)
331 (const uint8_t* data);
332 
333 // =================================================================================================================
334 #pragma mark - Kernel interface
335 
336 typedef struct{
337 	// Stream API
338 	compression_stream_init_proc                compression_stream_init;
339 	compression_stream_reinit_proc              compression_stream_reinit;
340 	compression_stream_destroy_proc             compression_stream_destroy;
341 	compression_stream_process_proc             compression_stream_process;
342 	compression_stream_identify_algorithm_proc  compression_stream_identify_algorithm;
343 
344 	// Buffer API
345 	compression_encode_scratch_buffer_size_proc compression_encode_scratch_buffer_size;
346 	compression_encode_buffer_proc              compression_encode_buffer;
347 	compression_decode_scratch_buffer_size_proc compression_decode_scratch_buffer_size;
348 	compression_decode_buffer_proc              compression_decode_buffer;
349 } compression_ki_t;
350 
351 __BEGIN_DECLS
352 
353 /**
354  * @abstract The compression interface that was registered.
355  */
356 extern const compression_ki_t * compression_ki_ptr;
357 
358 /**
359  * @abstract   Registers the compression kext interface for use within the kernel proper.
360  * @param ki   The interface to register.
361  * @discussion This routine may only be called once and must be called before late-const has been applied to kernel memory.
362  */
363 OS_EXPORT OS_NONNULL1
364 void compression_interface_register(const compression_ki_t *ki);
365 
366 #if PRIVATE
367 
368 typedef void (*registration_callback_t)(void);
369 
370 void compression_interface_set_registration_callback(registration_callback_t callback);
371 
372 #endif /* PRIVATE */
373 
374 __END_DECLS
375 
376 #endif // __COMPRESSION_H
377