1*0f4c859eSApple OSS Distributions /* 2*0f4c859eSApple OSS Distributions * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. 3*0f4c859eSApple OSS Distributions * 4*0f4c859eSApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5*0f4c859eSApple OSS Distributions * 6*0f4c859eSApple OSS Distributions * This file contains Original Code and/or Modifications of Original Code 7*0f4c859eSApple OSS Distributions * as defined in and that are subject to the Apple Public Source License 8*0f4c859eSApple OSS Distributions * Version 2.0 (the 'License'). You may not use this file except in 9*0f4c859eSApple OSS Distributions * compliance with the License. The rights granted to you under the License 10*0f4c859eSApple OSS Distributions * may not be used to create, or enable the creation or redistribution of, 11*0f4c859eSApple OSS Distributions * unlawful or unlicensed copies of an Apple operating system, or to 12*0f4c859eSApple OSS Distributions * circumvent, violate, or enable the circumvention or violation of, any 13*0f4c859eSApple OSS Distributions * terms of an Apple operating system software license agreement. 14*0f4c859eSApple OSS Distributions * 15*0f4c859eSApple OSS Distributions * Please obtain a copy of the License at 16*0f4c859eSApple OSS Distributions * http://www.opensource.apple.com/apsl/ and read it before using this file. 17*0f4c859eSApple OSS Distributions * 18*0f4c859eSApple OSS Distributions * The Original Code and all software distributed under the License are 19*0f4c859eSApple OSS Distributions * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20*0f4c859eSApple OSS Distributions * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21*0f4c859eSApple OSS Distributions * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22*0f4c859eSApple OSS Distributions * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23*0f4c859eSApple OSS Distributions * Please see the License for the specific language governing rights and 24*0f4c859eSApple OSS Distributions * limitations under the License. 25*0f4c859eSApple OSS Distributions * 26*0f4c859eSApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27*0f4c859eSApple OSS Distributions */ 28*0f4c859eSApple OSS Distributions 29*0f4c859eSApple OSS Distributions #ifndef _SYS_UTFCONV_H_ 30*0f4c859eSApple OSS Distributions #define _SYS_UTFCONV_H_ 31*0f4c859eSApple OSS Distributions 32*0f4c859eSApple OSS Distributions #include <sys/appleapiopts.h> 33*0f4c859eSApple OSS Distributions #include <sys/cdefs.h> 34*0f4c859eSApple OSS Distributions 35*0f4c859eSApple OSS Distributions #ifdef KERNEL 36*0f4c859eSApple OSS Distributions #ifdef __APPLE_API_UNSTABLE 37*0f4c859eSApple OSS Distributions 38*0f4c859eSApple OSS Distributions /* 39*0f4c859eSApple OSS Distributions * UTF-8 encode/decode flags 40*0f4c859eSApple OSS Distributions */ 41*0f4c859eSApple OSS Distributions #define UTF_REVERSE_ENDIAN 0x0001 /* reverse UCS-2 byte order */ 42*0f4c859eSApple OSS Distributions #define UTF_NO_NULL_TERM 0x0002 /* do not add null termination */ 43*0f4c859eSApple OSS Distributions #define UTF_DECOMPOSED 0x0004 /* generate fully decomposed UCS-2 */ 44*0f4c859eSApple OSS Distributions #define UTF_PRECOMPOSED 0x0008 /* generate precomposed UCS-2 */ 45*0f4c859eSApple OSS Distributions #define UTF_ESCAPE_ILLEGAL 0x0010 /* escape illegal UTF-8 */ 46*0f4c859eSApple OSS Distributions #define UTF_SFM_CONVERSIONS 0x0020 /* Use SFM mappings for illegal NTFS chars */ 47*0f4c859eSApple OSS Distributions 48*0f4c859eSApple OSS Distributions #define UTF_BIG_ENDIAN \ 49*0f4c859eSApple OSS Distributions ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) 50*0f4c859eSApple OSS Distributions 51*0f4c859eSApple OSS Distributions #define UTF_LITTLE_ENDIAN \ 52*0f4c859eSApple OSS Distributions ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) 53*0f4c859eSApple OSS Distributions 54*0f4c859eSApple OSS Distributions __BEGIN_DECLS 55*0f4c859eSApple OSS Distributions 56*0f4c859eSApple OSS Distributions 57*0f4c859eSApple OSS Distributions /* 58*0f4c859eSApple OSS Distributions * unicode_combinable - Test for a combining unicode character. 59*0f4c859eSApple OSS Distributions * 60*0f4c859eSApple OSS Distributions * This function is similar to __CFUniCharIsNonBaseCharacter except 61*0f4c859eSApple OSS Distributions * that it also includes Hangul Jamo characters. 62*0f4c859eSApple OSS Distributions */ 63*0f4c859eSApple OSS Distributions 64*0f4c859eSApple OSS Distributions int unicode_combinable(u_int16_t character); 65*0f4c859eSApple OSS Distributions 66*0f4c859eSApple OSS Distributions /* 67*0f4c859eSApple OSS Distributions * Test for a precomposed character. 68*0f4c859eSApple OSS Distributions * 69*0f4c859eSApple OSS Distributions * Similar to __CFUniCharIsDecomposableCharacter. 70*0f4c859eSApple OSS Distributions */ 71*0f4c859eSApple OSS Distributions 72*0f4c859eSApple OSS Distributions int unicode_decomposeable(u_int16_t character); 73*0f4c859eSApple OSS Distributions 74*0f4c859eSApple OSS Distributions 75*0f4c859eSApple OSS Distributions /* 76*0f4c859eSApple OSS Distributions * utf8_encodelen - Calculate the UTF-8 encoding length 77*0f4c859eSApple OSS Distributions * 78*0f4c859eSApple OSS Distributions * This function takes an Unicode input string, ucsp, of ucslen bytes 79*0f4c859eSApple OSS Distributions * and calculates the size of the UTF-8 output in bytes (not including 80*0f4c859eSApple OSS Distributions * a NULL termination byte). The string must reside in kernel memory. 81*0f4c859eSApple OSS Distributions * 82*0f4c859eSApple OSS Distributions * FLAGS 83*0f4c859eSApple OSS Distributions * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime 84*0f4c859eSApple OSS Distributions * 85*0f4c859eSApple OSS Distributions * UTF_BIG_ENDIAN: Unicode byte order is always big endian 86*0f4c859eSApple OSS Distributions * 87*0f4c859eSApple OSS Distributions * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian 88*0f4c859eSApple OSS Distributions * 89*0f4c859eSApple OSS Distributions * UTF_DECOMPOSED: assume fully decomposed output 90*0f4c859eSApple OSS Distributions * 91*0f4c859eSApple OSS Distributions * ERRORS 92*0f4c859eSApple OSS Distributions * None 93*0f4c859eSApple OSS Distributions */ 94*0f4c859eSApple OSS Distributions size_t 95*0f4c859eSApple OSS Distributions utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, 96*0f4c859eSApple OSS Distributions int flags); 97*0f4c859eSApple OSS Distributions 98*0f4c859eSApple OSS Distributions 99*0f4c859eSApple OSS Distributions /* 100*0f4c859eSApple OSS Distributions * utf8_encodestr - Encodes a Unicode string into UTF-8 101*0f4c859eSApple OSS Distributions * 102*0f4c859eSApple OSS Distributions * This function takes an Unicode input string, ucsp, of ucslen bytes 103*0f4c859eSApple OSS Distributions * and produces the UTF-8 output into a buffer of buflen bytes pointed 104*0f4c859eSApple OSS Distributions * to by utf8p. The size of the output in bytes (not including a NULL 105*0f4c859eSApple OSS Distributions * termination byte) is returned in utf8len. The UTF-8 string output 106*0f4c859eSApple OSS Distributions * is NULL terminated. Both buffers must reside in kernel memory. 107*0f4c859eSApple OSS Distributions * 108*0f4c859eSApple OSS Distributions * If '/' chars are possible in the Unicode input then an alternate 109*0f4c859eSApple OSS Distributions * (replacement) char must be provided in altslash. 110*0f4c859eSApple OSS Distributions * 111*0f4c859eSApple OSS Distributions * FLAGS 112*0f4c859eSApple OSS Distributions * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime 113*0f4c859eSApple OSS Distributions * 114*0f4c859eSApple OSS Distributions * UTF_BIG_ENDIAN: Unicode byte order is always big endian 115*0f4c859eSApple OSS Distributions * 116*0f4c859eSApple OSS Distributions * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian 117*0f4c859eSApple OSS Distributions * 118*0f4c859eSApple OSS Distributions * UTF_NO_NULL_TERM: do not add null termination to output string 119*0f4c859eSApple OSS Distributions * 120*0f4c859eSApple OSS Distributions * UTF_DECOMPOSED: generate fully decomposed output 121*0f4c859eSApple OSS Distributions * 122*0f4c859eSApple OSS Distributions * ERRORS 123*0f4c859eSApple OSS Distributions * ENAMETOOLONG: output did not fit; only utf8len bytes were encoded 124*0f4c859eSApple OSS Distributions * 125*0f4c859eSApple OSS Distributions * EINVAL: illegal Unicode char encountered 126*0f4c859eSApple OSS Distributions */ 127*0f4c859eSApple OSS Distributions int 128*0f4c859eSApple OSS Distributions utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, 129*0f4c859eSApple OSS Distributions size_t * utf8len, size_t buflen, u_int16_t altslash, int flags); 130*0f4c859eSApple OSS Distributions 131*0f4c859eSApple OSS Distributions 132*0f4c859eSApple OSS Distributions /* 133*0f4c859eSApple OSS Distributions * utf8_decodestr - Decodes a UTF-8 string into Unicode 134*0f4c859eSApple OSS Distributions * 135*0f4c859eSApple OSS Distributions * This function takes an UTF-8 input string, utf8p, of utf8len bytes 136*0f4c859eSApple OSS Distributions * and produces the Unicode output into a buffer of buflen bytes pointed 137*0f4c859eSApple OSS Distributions * to by ucsp. The size of the output in bytes (not including a NULL 138*0f4c859eSApple OSS Distributions * termination byte) is returned in ucslen. Both buffers must reside 139*0f4c859eSApple OSS Distributions * in kernel memory. 140*0f4c859eSApple OSS Distributions * 141*0f4c859eSApple OSS Distributions * If '/' chars are allowed in the Unicode output then an alternate 142*0f4c859eSApple OSS Distributions * (replacement) char must be provided in altslash. 143*0f4c859eSApple OSS Distributions * 144*0f4c859eSApple OSS Distributions * FLAGS 145*0f4c859eSApple OSS Distributions * UTF_REV_ENDIAN: Unicode byte order is opposite current runtime 146*0f4c859eSApple OSS Distributions * 147*0f4c859eSApple OSS Distributions * UTF_BIG_ENDIAN: Unicode byte order is always big endian 148*0f4c859eSApple OSS Distributions * 149*0f4c859eSApple OSS Distributions * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian 150*0f4c859eSApple OSS Distributions * 151*0f4c859eSApple OSS Distributions * UTF_DECOMPOSED: generate fully decomposed output (NFD) 152*0f4c859eSApple OSS Distributions * 153*0f4c859eSApple OSS Distributions * UTF_PRECOMPOSED: generate precomposed output (NFC) 154*0f4c859eSApple OSS Distributions * 155*0f4c859eSApple OSS Distributions * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input 156*0f4c859eSApple OSS Distributions * 157*0f4c859eSApple OSS Distributions * ERRORS 158*0f4c859eSApple OSS Distributions * ENAMETOOLONG: output did not fit; only ucslen bytes were decoded. 159*0f4c859eSApple OSS Distributions * 160*0f4c859eSApple OSS Distributions * EINVAL: illegal UTF-8 sequence encountered. 161*0f4c859eSApple OSS Distributions */ 162*0f4c859eSApple OSS Distributions int 163*0f4c859eSApple OSS Distributions utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, 164*0f4c859eSApple OSS Distributions size_t *ucslen, size_t buflen, u_int16_t altslash, int flags); 165*0f4c859eSApple OSS Distributions 166*0f4c859eSApple OSS Distributions 167*0f4c859eSApple OSS Distributions /* 168*0f4c859eSApple OSS Distributions * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD) 169*0f4c859eSApple OSS Distributions * 170*0f4c859eSApple OSS Distributions * This function takes an UTF-8 input string, instr, of inlen bytes 171*0f4c859eSApple OSS Distributions * and produces normalized UTF-8 output into a buffer of buflen bytes 172*0f4c859eSApple OSS Distributions * pointed to by outstr. The size of the output in bytes (not including 173*0f4c859eSApple OSS Distributions * a NULL termination byte) is returned in outlen. In-place conversions 174*0f4c859eSApple OSS Distributions * are not supported (i.e. instr != outstr). Both buffers must reside 175*0f4c859eSApple OSS Distributions * in kernel memory. 176*0f4c859eSApple OSS Distributions * 177*0f4c859eSApple OSS Distributions * FLAGS 178*0f4c859eSApple OSS Distributions * UTF_DECOMPOSED: output string will be fully decomposed (NFD) 179*0f4c859eSApple OSS Distributions * 180*0f4c859eSApple OSS Distributions * UTF_PRECOMPOSED: output string will be precomposed (NFC) 181*0f4c859eSApple OSS Distributions * 182*0f4c859eSApple OSS Distributions * UTF_NO_NULL_TERM: do not add null termination to output string 183*0f4c859eSApple OSS Distributions * 184*0f4c859eSApple OSS Distributions * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input 185*0f4c859eSApple OSS Distributions * 186*0f4c859eSApple OSS Distributions * ERRORS 187*0f4c859eSApple OSS Distributions * ENAMETOOLONG: output did not fit or input exceeded MAXPATHLEN bytes 188*0f4c859eSApple OSS Distributions * 189*0f4c859eSApple OSS Distributions * EINVAL: illegal UTF-8 sequence encountered or invalid flags 190*0f4c859eSApple OSS Distributions */ 191*0f4c859eSApple OSS Distributions int 192*0f4c859eSApple OSS Distributions utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, 193*0f4c859eSApple OSS Distributions size_t *outlen, size_t buflen, int flags); 194*0f4c859eSApple OSS Distributions 195*0f4c859eSApple OSS Distributions 196*0f4c859eSApple OSS Distributions /* 197*0f4c859eSApple OSS Distributions * utf8_validatestr - validates a UTF-8 string 198*0f4c859eSApple OSS Distributions * 199*0f4c859eSApple OSS Distributions * This function takes an UTF-8 input string, utf8p, of utf8len bytes 200*0f4c859eSApple OSS Distributions * and determines if its valid UTF-8. The string must reside in kernel 201*0f4c859eSApple OSS Distributions * memory. 202*0f4c859eSApple OSS Distributions * 203*0f4c859eSApple OSS Distributions * ERRORS 204*0f4c859eSApple OSS Distributions * EINVAL: illegal UTF-8 sequence encountered. 205*0f4c859eSApple OSS Distributions */ 206*0f4c859eSApple OSS Distributions int 207*0f4c859eSApple OSS Distributions utf8_validatestr(const u_int8_t* utf8p, size_t utf8len); 208*0f4c859eSApple OSS Distributions 209*0f4c859eSApple OSS Distributions 210*0f4c859eSApple OSS Distributions __END_DECLS 211*0f4c859eSApple OSS Distributions 212*0f4c859eSApple OSS Distributions #endif /* __APPLE_API_UNSTABLE */ 213*0f4c859eSApple OSS Distributions #endif /* KERNEL */ 214*0f4c859eSApple OSS Distributions 215*0f4c859eSApple OSS Distributions #endif /* !_SYS_UTFCONV_H_ */ 216