xref: /xnu-8796.101.5/libkern/libkern/compression/compression.h (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef __COMPRESSION_H
30 #define __COMPRESSION_H
31 
32 #include <stdint.h>
33 #include <stddef.h>
34 #include <os/base.h>
35 
36 /*!
37  *  @enum       compression_algorithm_t
38  *  @abstract   Tag used to select a compression algorithm.
39  *  @discussion Further details on the supported formats, and their implementation:
40  *
41  *              - LZ4 is an extremely high-performance compressor.  The open source version
42  *              is already one of the fastest compressors of which we are aware, and we
43  *              have optimized it still further in our implementation.  The encoded format
44  *              we produce and consume is compatible with the open source version, except
45  *              that we add a very simple frame to the raw stream to allow some additional
46  *              validation and functionality.
47  *
48  *              The frame is documented here so that you can easily wrap another LZ4
49  *              encoder/decoder to produce/consume the same data stream if necessary.  An
50  *              LZ4 encoded buffer is a sequence of blocks, each of which begins with a
51  *              header.  There are three possible headers:
52  *
53  *                   a "compressed block header" is (hex) 62 76 34 31, followed by the
54  *                   size in bytes of the decoded (plaintext) data represented by the
55  *                   block and the size (in bytes) of the encoded data stored in the
56  *                   block.  Both size fields are stored as (possibly unaligned) 32-bit
57  *                   little-endian values.  The compressed block header is followed
58  *                   immediately by the actual lz4-encoded data stream.
59  *
60  *                   an "uncompressed block header" is (hex) 62 76 34 2d, followed by the
61  *                   size of the data stored in the uncompressed block as a (possibly
62  *                   unaligned) 32-bit little-endian value.  The uncompressed block header
63  *                   is followed immediately by the uncompressed data buffer of the
64  *                   specified size.
65  *
66  *                   an "end of stream header" is (hex) 62 76 34 24, and marks the end
67  *                   of the lz4 frame.  No further data may be written or read beyond
68  *                   this header.
69  */
70 typedef enum{
71 	COMPRESSION_LZ4 = 0x100, // LZ4 + simple frame format
72 } compression_algorithm_t;
73 
74 /* Return values for the compression_stream functions. */
75 typedef enum{
76 	COMPRESSION_STATUS_OK    =  0,
77 	COMPRESSION_STATUS_ERROR = -1,
78 	COMPRESSION_STATUS_END   =  1,
79 } compression_status_t;
80 
81 typedef enum{
82 	COMPRESSION_STREAM_ENCODE = 0, /* Encode to a compressed stream */
83 	COMPRESSION_STREAM_DECODE = 1, /* Decode from a compressed stream */
84 } compression_stream_operation_t;
85 
86 /* Bits for the flags in compression_stream_process. */
87 typedef enum{
88 	COMPRESSION_STREAM_FINALIZE = 0x0001,
89 } compression_stream_flags_t;
90 
91 typedef struct{
92 	/*
93 	 *  You are partially responsible for management of the dst_ptr,
94 	 *  dst_size, src_ptr, and src_size fields.  You must initialize
95 	 *  them to describe valid memory buffers before making a call to
96 	 *  compression_stream_process. compression_stream_process will update
97 	 *  these fields before returning to account for the bytes of the src
98 	 *  and dst buffers that were successfully processed.
99 	 */
100 	uint8_t*       dst_ptr;
101 	size_t         dst_size;
102 	const uint8_t* src_ptr;
103 	size_t         src_size;
104 
105 	/* The stream state object is managed by the compression_stream functions.
106 	 *  You should not ever directly access this field. */
107 	void*          state;
108 } compression_stream_t;
109 
110 /*  There are two critical features of the stream interfaces:
111  *
112  *     - They allow encoding and decoding to be resumed from where it ended
113  *       when the end of a source or destination block was reached.
114  *
115  *     - When resuming, the new source and destination blocks need not be
116  *       contiguous with earlier blocks in the stream; all necessary state
117  *       to resume compression is represented by the compression_stream_t object.
118  *
119  *   These two properties enable tasks like:
120  *
121  *     - Decoding a compressed stream into a buffer with the ability to grow
122  *       the buffer and resume decoding if the expanded stream is too large
123  *       to fit without repeating any work.
124  *
125  *     - Encoding a stream as pieces of it become available without ever needing
126  *       to create an allocation large enough to hold all the uncompressed data.
127  *
128  *   The basic workflow for using the stream interface is as follows:
129  *
130  *       1. initialize the state of your compression_stream object by calling
131  *       compression_stream_init with the operation parameter set to specify
132  *       whether you will be encoding or decoding, and the chosen algorithm
133  *       specified by the algorithm parameter. This will allocate storage
134  *       for the state that allows encoding or decoding to be resumed
135  *       across calls.
136  *
137  *       2. set the dst_buffer, dst_size, src_buffer, and src_size fields of
138  *       the compression_stream object to point to the next blocks to be
139  *       processed.
140  *
141  *       3. call compression_stream_process. If no further input will be added
142  *       to the stream via subsequent calls, finalize should be non-zero.
143  *       If compression_stream_process returns COMPRESSION_STATUS_END, there
144  *       will be no further output from the stream.
145  *
146  *       4. repeat steps 2 and 3 as necessary to process the entire stream.
147  *
148  *       5. call compression_stream_destroy to free the state object in the
149  *       compression_stream.
150  */
151 
152 /*!
153  *  @abstract         Initialize a compression_stream for
154  *                    encoding (if operation is COMPRESSION_STREAM_ENCODE) or
155  *                    decoding (if operation is COMPRESSION_STREAM_DECODE).
156  *  @param stream     Pointer to the compression_stream object to be initialized.
157  *  @param operation  Specifies whether the stream is to initialized for encoding or decoding.
158  *                    Must be either COMPRESSION_STREAM_ENCODE or COMPRESSION_STREAM_DECODE.
159  *  @param algorithm  The compression algorithm to be used.  Must be one of the values specified
160  *                    in the compression_algorithm enum.
161  *  @discussion       This call initializes all fields of the compression_stream to zero, except for state;
162  *                    this routine allocates storage to capture the internal state of the encoding or decoding
163  *                    process so that it may be resumed. This storage is tracked via the state parameter.
164  *  @return           COMPRESSION_STATUS_OK if the stream was successfully initialized, or
165  *                    COMPRESSION_STATUS_ERROR if an error occurred.
166  */
167 typedef compression_status_t (*compression_stream_init_proc)
168 (compression_stream_t* stream,
169     compression_stream_operation_t operation,
170     compression_algorithm_t algorithm);
171 
172 /*!
173  *  @abstract Functionally equivalent to compression_stream_destroy then compression_stream_init, but keeps the allocated state buffer.
174  *  @return   Status of the virtual compression_stream_init call
175  */
176 typedef compression_status_t (*compression_stream_reinit_proc)
177 (compression_stream_t* stream,
178     compression_stream_operation_t operation,
179     compression_algorithm_t algorithm);
180 
181 /*!
182  *  @abstract   Cleans up state information stored in a compression_stream object.
183  *  @discussion Use this to free memory allocated by compression_stream_init.  After calling
184  *              this function, you will need to re-init the compression_stream object before
185  *              using it again.
186  */
187 typedef compression_status_t (*compression_stream_destroy_proc)
188 (compression_stream_t* stream);
189 
190 /*!
191  *  @abstract     Encodes or decodes a block of the stream.
192  *  @param stream Pointer to the compression_stream object to be operated on.  Before calling
193  *                this function, you must initialize the stream object by calling
194  *                compression_stream_init, and setting the user-managed fields to describe your
195  *                input and output buffers. When compression_stream_process returns, those
196  *                fields will have been updated to account for the bytes that were successfully
197  *                encoded or decoded in the course of its operation.
198  *  @param flags  Binary OR of zero or more compression_stream_flags:
199  *                COMPRESSION_STREAM_FINALIZE
200  *                  If set, indicates that no further input will be added to the stream, and
201  *                  thus that the end of stream should be indicated if the input block is
202  *                  completely processed.
203  *  @discussion   Processes the buffers described by the stream object until the source buffer
204  *                becomes empty, or the destination buffer becomes full, or the entire stream is
205  *                processed, or an error is encountered.
206  *  @return       When encoding COMPRESSION_STATUS_END is returned only if all input has been
207  *                read from the source, all output (including an end-of-stream marker) has been
208  *                written to the destination, and COMPRESSION_STREAM_FINALIZE bit is set.
209  *
210  *                When decoding COMPRESSION_STATUS_END is returned only if all input (including
211  *                and end-of-stream marker) has been read from the source, and all output has
212  *                been written to the destination.
213  *
214  *                COMPRESSION_STATUS_OK is returned if all data in the source buffer is consumed,
215  *                or all space in the destination buffer is used. In that case, further calls
216  *                to compression_stream_process are expected, providing more data in the source
217  *                buffer, or more space in the destination buffer.
218  *
219  *                COMPRESSION_STATUS_ERROR is returned if an error is encountered (if the
220  *                encoded data is corrupted, for example).
221  *
222  *                When decoding a valid stream, the end of stream will be detected from the contents
223  *                of the input, and COMPRESSION_STATUS_END will be returned in that case, even if
224  *                COMPRESSION_STREAM_FINALIZE is not set, or more input is provided.
225  *
226  *                When decoding a corrupted or truncated stream, if COMPRESSION_STREAM_FINALIZE is not
227  *                set to notify the decoder that no more input is coming, the decoder will not consume
228  *                or produce any data, and return COMPRESSION_STATUS_OK.  In that case, the client code
229  *                will call compression_stream_process again with the same state, entering an infinite loop.
230  *                To avoid this, it is strongly advised to always set COMPRESSION_STREAM_FINALIZE when
231  *                no more input is expected, for both encoding and decoding.
232  */
233 typedef compression_status_t (*compression_stream_process_proc)
234 (compression_stream_t* stream, int flags);
235 
236 /*!
237  *  @abstract   Identify the compression algorithm for the first 4 bytes of compressed data.
238  *  @param data Points to 4 bytes at the beginning of the compressed data.
239  *  @discussion This call identifies the compression algorithm used to generate the given data bytes.
240  *  @return     A valid compression_algorithm on success, or -1 if the data bytes do not correspond to any supported algorithm.
241  */
242 typedef int (*compression_stream_identify_algorithm_proc)
243 (const uint8_t* data);
244 
245 typedef struct{
246 	compression_stream_init_proc               compression_stream_init;
247 	compression_stream_reinit_proc             compression_stream_reinit;
248 	compression_stream_destroy_proc            compression_stream_destroy;
249 	compression_stream_process_proc            compression_stream_process;
250 	compression_stream_identify_algorithm_proc compression_stream_identify_algorithm;
251 } compression_ki_t;
252 
253 __BEGIN_DECLS
254 
255 /**
256  * @abstract The compression interface that was registered.
257  */
258 extern const compression_ki_t* compression_ki_ptr;
259 
260 /**
261  * @abstract   Registers the compression kext interface for use within the kernel proper.
262  * @param ki   The interface to register.
263  * @discussion This routine may only be called once and must be called before late-const has been applied to kernel memory.
264  */
265 OS_EXPORT OS_NONNULL1
266 void compression_interface_register(const compression_ki_t *ki);
267 
268 #if PRIVATE
269 
270 typedef void (*registration_callback_t)(void);
271 
272 void compression_interface_set_registration_callback(registration_callback_t callback);
273 
274 #endif /* PRIVATE */
275 
276 __END_DECLS
277 
278 #endif // __COMPRESSION_H
279