1*1031c584SApple OSS Distributions /* 2*1031c584SApple OSS Distributions * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. 3*1031c584SApple OSS Distributions * 4*1031c584SApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5*1031c584SApple OSS Distributions * 6*1031c584SApple OSS Distributions * This file contains Original Code and/or Modifications of Original Code 7*1031c584SApple OSS Distributions * as defined in and that are subject to the Apple Public Source License 8*1031c584SApple OSS Distributions * Version 2.0 (the 'License'). You may not use this file except in 9*1031c584SApple OSS Distributions * compliance with the License. The rights granted to you under the License 10*1031c584SApple OSS Distributions * may not be used to create, or enable the creation or redistribution of, 11*1031c584SApple OSS Distributions * unlawful or unlicensed copies of an Apple operating system, or to 12*1031c584SApple OSS Distributions * circumvent, violate, or enable the circumvention or violation of, any 13*1031c584SApple OSS Distributions * terms of an Apple operating system software license agreement. 14*1031c584SApple OSS Distributions * 15*1031c584SApple OSS Distributions * Please obtain a copy of the License at 16*1031c584SApple OSS Distributions * http://www.opensource.apple.com/apsl/ and read it before using this file. 17*1031c584SApple OSS Distributions * 18*1031c584SApple OSS Distributions * The Original Code and all software distributed under the License are 19*1031c584SApple OSS Distributions * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20*1031c584SApple OSS Distributions * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21*1031c584SApple OSS Distributions * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22*1031c584SApple OSS Distributions * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23*1031c584SApple OSS Distributions * Please see the License for the specific language governing rights and 24*1031c584SApple OSS Distributions * limitations under the License. 25*1031c584SApple OSS Distributions * 26*1031c584SApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27*1031c584SApple OSS Distributions */ 28*1031c584SApple OSS Distributions 29*1031c584SApple OSS Distributions #ifndef _SYS_UTFCONV_H_ 30*1031c584SApple OSS Distributions #define _SYS_UTFCONV_H_ 31*1031c584SApple OSS Distributions 32*1031c584SApple OSS Distributions #include <sys/appleapiopts.h> 33*1031c584SApple OSS Distributions #include <sys/cdefs.h> 34*1031c584SApple OSS Distributions 35*1031c584SApple OSS Distributions #ifdef KERNEL 36*1031c584SApple OSS Distributions #ifdef __APPLE_API_UNSTABLE 37*1031c584SApple OSS Distributions 38*1031c584SApple OSS Distributions /* 39*1031c584SApple OSS Distributions * UTF-8 encode/decode flags 40*1031c584SApple OSS Distributions */ 41*1031c584SApple OSS Distributions #define UTF_REVERSE_ENDIAN 0x0001 /* reverse UCS-2 byte order */ 42*1031c584SApple OSS Distributions #define UTF_NO_NULL_TERM 0x0002 /* do not add null termination */ 43*1031c584SApple OSS Distributions #define UTF_DECOMPOSED 0x0004 /* generate fully decomposed UCS-2 */ 44*1031c584SApple OSS Distributions #define UTF_PRECOMPOSED 0x0008 /* generate precomposed UCS-2 */ 45*1031c584SApple OSS Distributions #define UTF_ESCAPE_ILLEGAL 0x0010 /* escape illegal UTF-8 */ 46*1031c584SApple OSS Distributions #define UTF_SFM_CONVERSIONS 0x0020 /* Use SFM mappings for illegal NTFS chars */ 47*1031c584SApple OSS Distributions 48*1031c584SApple OSS Distributions #define UTF_BIG_ENDIAN \ 49*1031c584SApple OSS Distributions ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) 50*1031c584SApple OSS Distributions 51*1031c584SApple OSS Distributions #define UTF_LITTLE_ENDIAN \ 52*1031c584SApple OSS Distributions ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) 53*1031c584SApple OSS Distributions 54*1031c584SApple OSS Distributions __BEGIN_DECLS 55*1031c584SApple OSS Distributions 56*1031c584SApple OSS Distributions 57*1031c584SApple OSS Distributions /* 58*1031c584SApple OSS Distributions * unicode_combinable - Test for a combining unicode character. 59*1031c584SApple OSS Distributions * 60*1031c584SApple OSS Distributions * This function is similar to __CFUniCharIsNonBaseCharacter except 61*1031c584SApple OSS Distributions * that it also includes Hangul Jamo characters. 62*1031c584SApple OSS Distributions */ 63*1031c584SApple OSS Distributions 64*1031c584SApple OSS Distributions int unicode_combinable(u_int16_t character); 65*1031c584SApple OSS Distributions 66*1031c584SApple OSS Distributions /* 67*1031c584SApple OSS Distributions * Test for a precomposed character. 68*1031c584SApple OSS Distributions * 69*1031c584SApple OSS Distributions * Similar to __CFUniCharIsDecomposableCharacter. 70*1031c584SApple OSS Distributions */ 71*1031c584SApple OSS Distributions 72*1031c584SApple OSS Distributions int unicode_decomposeable(u_int16_t character); 73*1031c584SApple OSS Distributions 74*1031c584SApple OSS Distributions 75*1031c584SApple OSS Distributions /* 76*1031c584SApple OSS Distributions * utf8_encodelen - Calculate the UTF-8 encoding length 77*1031c584SApple OSS Distributions * 78*1031c584SApple OSS Distributions * This function takes an Unicode input string, ucsp, of ucslen bytes 79*1031c584SApple OSS Distributions * and calculates the size of the UTF-8 output in bytes (not including 80*1031c584SApple OSS Distributions * a NULL termination byte). The string must reside in kernel memory. 81*1031c584SApple OSS Distributions * 82*1031c584SApple OSS Distributions * FLAGS 83*1031c584SApple OSS Distributions * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime 84*1031c584SApple OSS Distributions * 85*1031c584SApple OSS Distributions * UTF_BIG_ENDIAN: Unicode byte order is always big endian 86*1031c584SApple OSS Distributions * 87*1031c584SApple OSS Distributions * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian 88*1031c584SApple OSS Distributions * 89*1031c584SApple OSS Distributions * UTF_DECOMPOSED: assume fully decomposed output 90*1031c584SApple OSS Distributions * 91*1031c584SApple OSS Distributions * ERRORS 92*1031c584SApple OSS Distributions * None 93*1031c584SApple OSS Distributions */ 94*1031c584SApple OSS Distributions size_t 95*1031c584SApple OSS Distributions utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, 96*1031c584SApple OSS Distributions int flags); 97*1031c584SApple OSS Distributions 98*1031c584SApple OSS Distributions 99*1031c584SApple OSS Distributions /* 100*1031c584SApple OSS Distributions * utf8_encodestr - Encodes a Unicode string into UTF-8 101*1031c584SApple OSS Distributions * 102*1031c584SApple OSS Distributions * This function takes an Unicode input string, ucsp, of ucslen bytes 103*1031c584SApple OSS Distributions * and produces the UTF-8 output into a buffer of buflen bytes pointed 104*1031c584SApple OSS Distributions * to by utf8p. The size of the output in bytes (not including a NULL 105*1031c584SApple OSS Distributions * termination byte) is returned in utf8len. The UTF-8 string output 106*1031c584SApple OSS Distributions * is NULL terminated. Both buffers must reside in kernel memory. 107*1031c584SApple OSS Distributions * 108*1031c584SApple OSS Distributions * If '/' chars are possible in the Unicode input then an alternate 109*1031c584SApple OSS Distributions * (replacement) char must be provided in altslash. 110*1031c584SApple OSS Distributions * 111*1031c584SApple OSS Distributions * FLAGS 112*1031c584SApple OSS Distributions * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime 113*1031c584SApple OSS Distributions * 114*1031c584SApple OSS Distributions * UTF_BIG_ENDIAN: Unicode byte order is always big endian 115*1031c584SApple OSS Distributions * 116*1031c584SApple OSS Distributions * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian 117*1031c584SApple OSS Distributions * 118*1031c584SApple OSS Distributions * UTF_NO_NULL_TERM: do not add null termination to output string 119*1031c584SApple OSS Distributions * 120*1031c584SApple OSS Distributions * UTF_DECOMPOSED: generate fully decomposed output 121*1031c584SApple OSS Distributions * 122*1031c584SApple OSS Distributions * ERRORS 123*1031c584SApple OSS Distributions * ENAMETOOLONG: output did not fit; only utf8len bytes were encoded 124*1031c584SApple OSS Distributions * 125*1031c584SApple OSS Distributions * EINVAL: illegal Unicode char encountered 126*1031c584SApple OSS Distributions */ 127*1031c584SApple OSS Distributions int 128*1031c584SApple OSS Distributions utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, 129*1031c584SApple OSS Distributions size_t * utf8len, size_t buflen, u_int16_t altslash, int flags); 130*1031c584SApple OSS Distributions 131*1031c584SApple OSS Distributions 132*1031c584SApple OSS Distributions /* 133*1031c584SApple OSS Distributions * utf8_decodestr - Decodes a UTF-8 string into Unicode 134*1031c584SApple OSS Distributions * 135*1031c584SApple OSS Distributions * This function takes an UTF-8 input string, utf8p, of utf8len bytes 136*1031c584SApple OSS Distributions * and produces the Unicode output into a buffer of buflen bytes pointed 137*1031c584SApple OSS Distributions * to by ucsp. The size of the output in bytes (not including a NULL 138*1031c584SApple OSS Distributions * termination byte) is returned in ucslen. Both buffers must reside 139*1031c584SApple OSS Distributions * in kernel memory. 140*1031c584SApple OSS Distributions * 141*1031c584SApple OSS Distributions * If '/' chars are allowed in the Unicode output then an alternate 142*1031c584SApple OSS Distributions * (replacement) char must be provided in altslash. 143*1031c584SApple OSS Distributions * 144*1031c584SApple OSS Distributions * FLAGS 145*1031c584SApple OSS Distributions * UTF_REV_ENDIAN: Unicode byte order is opposite current runtime 146*1031c584SApple OSS Distributions * 147*1031c584SApple OSS Distributions * UTF_BIG_ENDIAN: Unicode byte order is always big endian 148*1031c584SApple OSS Distributions * 149*1031c584SApple OSS Distributions * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian 150*1031c584SApple OSS Distributions * 151*1031c584SApple OSS Distributions * UTF_DECOMPOSED: generate fully decomposed output (NFD) 152*1031c584SApple OSS Distributions * 153*1031c584SApple OSS Distributions * UTF_PRECOMPOSED: generate precomposed output (NFC) 154*1031c584SApple OSS Distributions * 155*1031c584SApple OSS Distributions * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input 156*1031c584SApple OSS Distributions * 157*1031c584SApple OSS Distributions * ERRORS 158*1031c584SApple OSS Distributions * ENAMETOOLONG: output did not fit; only ucslen bytes were decoded. 159*1031c584SApple OSS Distributions * 160*1031c584SApple OSS Distributions * EINVAL: illegal UTF-8 sequence encountered. 161*1031c584SApple OSS Distributions */ 162*1031c584SApple OSS Distributions int 163*1031c584SApple OSS Distributions utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, 164*1031c584SApple OSS Distributions size_t *ucslen, size_t buflen, u_int16_t altslash, int flags); 165*1031c584SApple OSS Distributions 166*1031c584SApple OSS Distributions 167*1031c584SApple OSS Distributions /* 168*1031c584SApple OSS Distributions * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD) 169*1031c584SApple OSS Distributions * 170*1031c584SApple OSS Distributions * This function takes an UTF-8 input string, instr, of inlen bytes 171*1031c584SApple OSS Distributions * and produces normalized UTF-8 output into a buffer of buflen bytes 172*1031c584SApple OSS Distributions * pointed to by outstr. The size of the output in bytes (not including 173*1031c584SApple OSS Distributions * a NULL termination byte) is returned in outlen. In-place conversions 174*1031c584SApple OSS Distributions * are not supported (i.e. instr != outstr). Both buffers must reside 175*1031c584SApple OSS Distributions * in kernel memory. 176*1031c584SApple OSS Distributions * 177*1031c584SApple OSS Distributions * FLAGS 178*1031c584SApple OSS Distributions * UTF_DECOMPOSED: output string will be fully decomposed (NFD) 179*1031c584SApple OSS Distributions * 180*1031c584SApple OSS Distributions * UTF_PRECOMPOSED: output string will be precomposed (NFC) 181*1031c584SApple OSS Distributions * 182*1031c584SApple OSS Distributions * UTF_NO_NULL_TERM: do not add null termination to output string 183*1031c584SApple OSS Distributions * 184*1031c584SApple OSS Distributions * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input 185*1031c584SApple OSS Distributions * 186*1031c584SApple OSS Distributions * ERRORS 187*1031c584SApple OSS Distributions * ENAMETOOLONG: output did not fit or input exceeded MAXPATHLEN bytes 188*1031c584SApple OSS Distributions * 189*1031c584SApple OSS Distributions * EINVAL: illegal UTF-8 sequence encountered or invalid flags 190*1031c584SApple OSS Distributions */ 191*1031c584SApple OSS Distributions int 192*1031c584SApple OSS Distributions utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, 193*1031c584SApple OSS Distributions size_t *outlen, size_t buflen, int flags); 194*1031c584SApple OSS Distributions 195*1031c584SApple OSS Distributions 196*1031c584SApple OSS Distributions /* 197*1031c584SApple OSS Distributions * utf8_validatestr - validates a UTF-8 string 198*1031c584SApple OSS Distributions * 199*1031c584SApple OSS Distributions * This function takes an UTF-8 input string, utf8p, of utf8len bytes 200*1031c584SApple OSS Distributions * and determines if its valid UTF-8. The string must reside in kernel 201*1031c584SApple OSS Distributions * memory. 202*1031c584SApple OSS Distributions * 203*1031c584SApple OSS Distributions * ERRORS 204*1031c584SApple OSS Distributions * EINVAL: illegal UTF-8 sequence encountered. 205*1031c584SApple OSS Distributions */ 206*1031c584SApple OSS Distributions int 207*1031c584SApple OSS Distributions utf8_validatestr(const u_int8_t* utf8p, size_t utf8len); 208*1031c584SApple OSS Distributions 209*1031c584SApple OSS Distributions 210*1031c584SApple OSS Distributions __END_DECLS 211*1031c584SApple OSS Distributions 212*1031c584SApple OSS Distributions #endif /* __APPLE_API_UNSTABLE */ 213*1031c584SApple OSS Distributions #endif /* KERNEL */ 214*1031c584SApple OSS Distributions 215*1031c584SApple OSS Distributions #endif /* !_SYS_UTFCONV_H_ */ 216