xref: /xnu-10002.61.3/bsd/sys/utfconv.h (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1*0f4c859eSApple OSS Distributions /*
2*0f4c859eSApple OSS Distributions  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3*0f4c859eSApple OSS Distributions  *
4*0f4c859eSApple OSS Distributions  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5*0f4c859eSApple OSS Distributions  *
6*0f4c859eSApple OSS Distributions  * This file contains Original Code and/or Modifications of Original Code
7*0f4c859eSApple OSS Distributions  * as defined in and that are subject to the Apple Public Source License
8*0f4c859eSApple OSS Distributions  * Version 2.0 (the 'License'). You may not use this file except in
9*0f4c859eSApple OSS Distributions  * compliance with the License. The rights granted to you under the License
10*0f4c859eSApple OSS Distributions  * may not be used to create, or enable the creation or redistribution of,
11*0f4c859eSApple OSS Distributions  * unlawful or unlicensed copies of an Apple operating system, or to
12*0f4c859eSApple OSS Distributions  * circumvent, violate, or enable the circumvention or violation of, any
13*0f4c859eSApple OSS Distributions  * terms of an Apple operating system software license agreement.
14*0f4c859eSApple OSS Distributions  *
15*0f4c859eSApple OSS Distributions  * Please obtain a copy of the License at
16*0f4c859eSApple OSS Distributions  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17*0f4c859eSApple OSS Distributions  *
18*0f4c859eSApple OSS Distributions  * The Original Code and all software distributed under the License are
19*0f4c859eSApple OSS Distributions  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20*0f4c859eSApple OSS Distributions  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21*0f4c859eSApple OSS Distributions  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22*0f4c859eSApple OSS Distributions  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23*0f4c859eSApple OSS Distributions  * Please see the License for the specific language governing rights and
24*0f4c859eSApple OSS Distributions  * limitations under the License.
25*0f4c859eSApple OSS Distributions  *
26*0f4c859eSApple OSS Distributions  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27*0f4c859eSApple OSS Distributions  */
28*0f4c859eSApple OSS Distributions 
29*0f4c859eSApple OSS Distributions #ifndef _SYS_UTFCONV_H_
30*0f4c859eSApple OSS Distributions #define _SYS_UTFCONV_H_
31*0f4c859eSApple OSS Distributions 
32*0f4c859eSApple OSS Distributions #include <sys/appleapiopts.h>
33*0f4c859eSApple OSS Distributions #include <sys/cdefs.h>
34*0f4c859eSApple OSS Distributions 
35*0f4c859eSApple OSS Distributions #ifdef KERNEL
36*0f4c859eSApple OSS Distributions #ifdef __APPLE_API_UNSTABLE
37*0f4c859eSApple OSS Distributions 
38*0f4c859eSApple OSS Distributions /*
39*0f4c859eSApple OSS Distributions  * UTF-8 encode/decode flags
40*0f4c859eSApple OSS Distributions  */
41*0f4c859eSApple OSS Distributions #define UTF_REVERSE_ENDIAN   0x0001   /* reverse UCS-2 byte order */
42*0f4c859eSApple OSS Distributions #define UTF_NO_NULL_TERM     0x0002   /* do not add null termination */
43*0f4c859eSApple OSS Distributions #define UTF_DECOMPOSED       0x0004   /* generate fully decomposed UCS-2 */
44*0f4c859eSApple OSS Distributions #define UTF_PRECOMPOSED      0x0008   /* generate precomposed UCS-2 */
45*0f4c859eSApple OSS Distributions #define UTF_ESCAPE_ILLEGAL   0x0010   /* escape illegal UTF-8 */
46*0f4c859eSApple OSS Distributions #define UTF_SFM_CONVERSIONS  0x0020   /* Use SFM mappings for illegal NTFS chars */
47*0f4c859eSApple OSS Distributions 
48*0f4c859eSApple OSS Distributions #define UTF_BIG_ENDIAN       \
49*0f4c859eSApple OSS Distributions 	((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN)
50*0f4c859eSApple OSS Distributions 
51*0f4c859eSApple OSS Distributions #define UTF_LITTLE_ENDIAN    \
52*0f4c859eSApple OSS Distributions 	((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN)
53*0f4c859eSApple OSS Distributions 
54*0f4c859eSApple OSS Distributions __BEGIN_DECLS
55*0f4c859eSApple OSS Distributions 
56*0f4c859eSApple OSS Distributions 
57*0f4c859eSApple OSS Distributions /*
58*0f4c859eSApple OSS Distributions  * unicode_combinable - Test for a combining unicode character.
59*0f4c859eSApple OSS Distributions  *
60*0f4c859eSApple OSS Distributions  * This function is similar to __CFUniCharIsNonBaseCharacter except
61*0f4c859eSApple OSS Distributions  * that it also includes Hangul Jamo characters.
62*0f4c859eSApple OSS Distributions  */
63*0f4c859eSApple OSS Distributions 
64*0f4c859eSApple OSS Distributions int unicode_combinable(u_int16_t character);
65*0f4c859eSApple OSS Distributions 
66*0f4c859eSApple OSS Distributions /*
67*0f4c859eSApple OSS Distributions  * Test for a precomposed character.
68*0f4c859eSApple OSS Distributions  *
69*0f4c859eSApple OSS Distributions  * Similar to __CFUniCharIsDecomposableCharacter.
70*0f4c859eSApple OSS Distributions  */
71*0f4c859eSApple OSS Distributions 
72*0f4c859eSApple OSS Distributions int unicode_decomposeable(u_int16_t character);
73*0f4c859eSApple OSS Distributions 
74*0f4c859eSApple OSS Distributions 
75*0f4c859eSApple OSS Distributions /*
76*0f4c859eSApple OSS Distributions  * utf8_encodelen - Calculate the UTF-8 encoding length
77*0f4c859eSApple OSS Distributions  *
78*0f4c859eSApple OSS Distributions  * This function takes an Unicode input string, ucsp, of ucslen bytes
79*0f4c859eSApple OSS Distributions  * and calculates the size of the UTF-8 output in bytes (not including
80*0f4c859eSApple OSS Distributions  * a NULL termination byte). The string must reside in kernel memory.
81*0f4c859eSApple OSS Distributions  *
82*0f4c859eSApple OSS Distributions  * FLAGS
83*0f4c859eSApple OSS Distributions  *    UTF_REVERSE_ENDIAN:  Unicode byte order is opposite current runtime
84*0f4c859eSApple OSS Distributions  *
85*0f4c859eSApple OSS Distributions  *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian
86*0f4c859eSApple OSS Distributions  *
87*0f4c859eSApple OSS Distributions  *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian
88*0f4c859eSApple OSS Distributions  *
89*0f4c859eSApple OSS Distributions  *    UTF_DECOMPOSED:  assume fully decomposed output
90*0f4c859eSApple OSS Distributions  *
91*0f4c859eSApple OSS Distributions  * ERRORS
92*0f4c859eSApple OSS Distributions  *    None
93*0f4c859eSApple OSS Distributions  */
94*0f4c859eSApple OSS Distributions size_t
95*0f4c859eSApple OSS Distributions utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
96*0f4c859eSApple OSS Distributions     int flags);
97*0f4c859eSApple OSS Distributions 
98*0f4c859eSApple OSS Distributions 
99*0f4c859eSApple OSS Distributions /*
100*0f4c859eSApple OSS Distributions  * utf8_encodestr - Encodes a Unicode string into UTF-8
101*0f4c859eSApple OSS Distributions  *
102*0f4c859eSApple OSS Distributions  * This function takes an Unicode input string, ucsp, of ucslen bytes
103*0f4c859eSApple OSS Distributions  * and produces the UTF-8 output into a buffer of buflen bytes pointed
104*0f4c859eSApple OSS Distributions  * to by utf8p. The size of the output in bytes (not including a NULL
105*0f4c859eSApple OSS Distributions  * termination byte) is returned in utf8len. The UTF-8 string output
106*0f4c859eSApple OSS Distributions  * is NULL terminated. Both buffers must reside in kernel memory.
107*0f4c859eSApple OSS Distributions  *
108*0f4c859eSApple OSS Distributions  * If '/' chars are possible in the Unicode input then an alternate
109*0f4c859eSApple OSS Distributions  * (replacement) char must be provided in altslash.
110*0f4c859eSApple OSS Distributions  *
111*0f4c859eSApple OSS Distributions  * FLAGS
112*0f4c859eSApple OSS Distributions  *    UTF_REVERSE_ENDIAN:  Unicode byte order is opposite current runtime
113*0f4c859eSApple OSS Distributions  *
114*0f4c859eSApple OSS Distributions  *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian
115*0f4c859eSApple OSS Distributions  *
116*0f4c859eSApple OSS Distributions  *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian
117*0f4c859eSApple OSS Distributions  *
118*0f4c859eSApple OSS Distributions  *    UTF_NO_NULL_TERM:  do not add null termination to output string
119*0f4c859eSApple OSS Distributions  *
120*0f4c859eSApple OSS Distributions  *    UTF_DECOMPOSED:  generate fully decomposed output
121*0f4c859eSApple OSS Distributions  *
122*0f4c859eSApple OSS Distributions  * ERRORS
123*0f4c859eSApple OSS Distributions  *    ENAMETOOLONG:  output did not fit; only utf8len bytes were encoded
124*0f4c859eSApple OSS Distributions  *
125*0f4c859eSApple OSS Distributions  *    EINVAL:  illegal Unicode char encountered
126*0f4c859eSApple OSS Distributions  */
127*0f4c859eSApple OSS Distributions int
128*0f4c859eSApple OSS Distributions utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
129*0f4c859eSApple OSS Distributions     size_t * utf8len, size_t buflen, u_int16_t altslash, int flags);
130*0f4c859eSApple OSS Distributions 
131*0f4c859eSApple OSS Distributions 
132*0f4c859eSApple OSS Distributions /*
133*0f4c859eSApple OSS Distributions  * utf8_decodestr - Decodes a UTF-8 string into Unicode
134*0f4c859eSApple OSS Distributions  *
135*0f4c859eSApple OSS Distributions  * This function takes an UTF-8 input string, utf8p, of utf8len bytes
136*0f4c859eSApple OSS Distributions  * and produces the Unicode output into a buffer of buflen bytes pointed
137*0f4c859eSApple OSS Distributions  * to by ucsp. The size of the output in bytes (not including a NULL
138*0f4c859eSApple OSS Distributions  * termination byte) is returned in ucslen. Both buffers must reside
139*0f4c859eSApple OSS Distributions  * in kernel memory.
140*0f4c859eSApple OSS Distributions  *
141*0f4c859eSApple OSS Distributions  * If '/' chars are allowed in the Unicode output then an alternate
142*0f4c859eSApple OSS Distributions  * (replacement) char must be provided in altslash.
143*0f4c859eSApple OSS Distributions  *
144*0f4c859eSApple OSS Distributions  * FLAGS
145*0f4c859eSApple OSS Distributions  *    UTF_REV_ENDIAN:  Unicode byte order is opposite current runtime
146*0f4c859eSApple OSS Distributions  *
147*0f4c859eSApple OSS Distributions  *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian
148*0f4c859eSApple OSS Distributions  *
149*0f4c859eSApple OSS Distributions  *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian
150*0f4c859eSApple OSS Distributions  *
151*0f4c859eSApple OSS Distributions  *    UTF_DECOMPOSED:  generate fully decomposed output (NFD)
152*0f4c859eSApple OSS Distributions  *
153*0f4c859eSApple OSS Distributions  *    UTF_PRECOMPOSED:  generate precomposed output (NFC)
154*0f4c859eSApple OSS Distributions  *
155*0f4c859eSApple OSS Distributions  *    UTF_ESCAPE_ILLEGAL:  percent escape any illegal UTF-8 input
156*0f4c859eSApple OSS Distributions  *
157*0f4c859eSApple OSS Distributions  * ERRORS
158*0f4c859eSApple OSS Distributions  *    ENAMETOOLONG:  output did not fit; only ucslen bytes were decoded.
159*0f4c859eSApple OSS Distributions  *
160*0f4c859eSApple OSS Distributions  *    EINVAL:  illegal UTF-8 sequence encountered.
161*0f4c859eSApple OSS Distributions  */
162*0f4c859eSApple OSS Distributions int
163*0f4c859eSApple OSS Distributions utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
164*0f4c859eSApple OSS Distributions     size_t *ucslen, size_t buflen, u_int16_t altslash, int flags);
165*0f4c859eSApple OSS Distributions 
166*0f4c859eSApple OSS Distributions 
167*0f4c859eSApple OSS Distributions /*
168*0f4c859eSApple OSS Distributions  * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD)
169*0f4c859eSApple OSS Distributions  *
170*0f4c859eSApple OSS Distributions  * This function takes an UTF-8 input string, instr, of inlen bytes
171*0f4c859eSApple OSS Distributions  * and produces normalized UTF-8 output into a buffer of buflen bytes
172*0f4c859eSApple OSS Distributions  * pointed to by outstr. The size of the output in bytes (not including
173*0f4c859eSApple OSS Distributions  * a NULL termination byte) is returned in outlen. In-place conversions
174*0f4c859eSApple OSS Distributions  * are not supported (i.e. instr != outstr).  Both buffers must reside
175*0f4c859eSApple OSS Distributions  * in kernel memory.
176*0f4c859eSApple OSS Distributions  *
177*0f4c859eSApple OSS Distributions  * FLAGS
178*0f4c859eSApple OSS Distributions  *    UTF_DECOMPOSED:  output string will be fully decomposed (NFD)
179*0f4c859eSApple OSS Distributions  *
180*0f4c859eSApple OSS Distributions  *    UTF_PRECOMPOSED:  output string will be precomposed (NFC)
181*0f4c859eSApple OSS Distributions  *
182*0f4c859eSApple OSS Distributions  *    UTF_NO_NULL_TERM:  do not add null termination to output string
183*0f4c859eSApple OSS Distributions  *
184*0f4c859eSApple OSS Distributions  *    UTF_ESCAPE_ILLEGAL:  percent escape any illegal UTF-8 input
185*0f4c859eSApple OSS Distributions  *
186*0f4c859eSApple OSS Distributions  * ERRORS
187*0f4c859eSApple OSS Distributions  *    ENAMETOOLONG:  output did not fit or input exceeded MAXPATHLEN bytes
188*0f4c859eSApple OSS Distributions  *
189*0f4c859eSApple OSS Distributions  *    EINVAL:  illegal UTF-8 sequence encountered or invalid flags
190*0f4c859eSApple OSS Distributions  */
191*0f4c859eSApple OSS Distributions int
192*0f4c859eSApple OSS Distributions utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr,
193*0f4c859eSApple OSS Distributions     size_t *outlen, size_t buflen, int flags);
194*0f4c859eSApple OSS Distributions 
195*0f4c859eSApple OSS Distributions 
196*0f4c859eSApple OSS Distributions /*
197*0f4c859eSApple OSS Distributions  * utf8_validatestr - validates a UTF-8 string
198*0f4c859eSApple OSS Distributions  *
199*0f4c859eSApple OSS Distributions  * This function takes an UTF-8 input string, utf8p, of utf8len bytes
200*0f4c859eSApple OSS Distributions  * and determines if its valid UTF-8.  The string must reside in kernel
201*0f4c859eSApple OSS Distributions  * memory.
202*0f4c859eSApple OSS Distributions  *
203*0f4c859eSApple OSS Distributions  * ERRORS
204*0f4c859eSApple OSS Distributions  *    EINVAL:  illegal UTF-8 sequence encountered.
205*0f4c859eSApple OSS Distributions  */
206*0f4c859eSApple OSS Distributions int
207*0f4c859eSApple OSS Distributions utf8_validatestr(const u_int8_t* utf8p, size_t utf8len);
208*0f4c859eSApple OSS Distributions 
209*0f4c859eSApple OSS Distributions 
210*0f4c859eSApple OSS Distributions __END_DECLS
211*0f4c859eSApple OSS Distributions 
212*0f4c859eSApple OSS Distributions #endif /* __APPLE_API_UNSTABLE */
213*0f4c859eSApple OSS Distributions #endif /* KERNEL */
214*0f4c859eSApple OSS Distributions 
215*0f4c859eSApple OSS Distributions #endif /* !_SYS_UTFCONV_H_ */
216