1*4d495c6eSApple OSS Distributions /* 2*4d495c6eSApple OSS Distributions * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. 3*4d495c6eSApple OSS Distributions * 4*4d495c6eSApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5*4d495c6eSApple OSS Distributions * 6*4d495c6eSApple OSS Distributions * This file contains Original Code and/or Modifications of Original Code 7*4d495c6eSApple OSS Distributions * as defined in and that are subject to the Apple Public Source License 8*4d495c6eSApple OSS Distributions * Version 2.0 (the 'License'). You may not use this file except in 9*4d495c6eSApple OSS Distributions * compliance with the License. The rights granted to you under the License 10*4d495c6eSApple OSS Distributions * may not be used to create, or enable the creation or redistribution of, 11*4d495c6eSApple OSS Distributions * unlawful or unlicensed copies of an Apple operating system, or to 12*4d495c6eSApple OSS Distributions * circumvent, violate, or enable the circumvention or violation of, any 13*4d495c6eSApple OSS Distributions * terms of an Apple operating system software license agreement. 14*4d495c6eSApple OSS Distributions * 15*4d495c6eSApple OSS Distributions * Please obtain a copy of the License at 16*4d495c6eSApple OSS Distributions * http://www.opensource.apple.com/apsl/ and read it before using this file. 17*4d495c6eSApple OSS Distributions * 18*4d495c6eSApple OSS Distributions * The Original Code and all software distributed under the License are 19*4d495c6eSApple OSS Distributions * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20*4d495c6eSApple OSS Distributions * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21*4d495c6eSApple OSS Distributions * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22*4d495c6eSApple OSS Distributions * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23*4d495c6eSApple OSS Distributions * Please see the License for the specific language governing rights and 24*4d495c6eSApple OSS Distributions * limitations under the License. 25*4d495c6eSApple OSS Distributions * 26*4d495c6eSApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27*4d495c6eSApple OSS Distributions */ 28*4d495c6eSApple OSS Distributions 29*4d495c6eSApple OSS Distributions #ifndef _SYS_UTFCONV_H_ 30*4d495c6eSApple OSS Distributions #define _SYS_UTFCONV_H_ 31*4d495c6eSApple OSS Distributions 32*4d495c6eSApple OSS Distributions #include <sys/appleapiopts.h> 33*4d495c6eSApple OSS Distributions #include <sys/cdefs.h> 34*4d495c6eSApple OSS Distributions 35*4d495c6eSApple OSS Distributions #ifdef KERNEL 36*4d495c6eSApple OSS Distributions #ifdef __APPLE_API_UNSTABLE 37*4d495c6eSApple OSS Distributions 38*4d495c6eSApple OSS Distributions /* 39*4d495c6eSApple OSS Distributions * UTF-8 encode/decode flags 40*4d495c6eSApple OSS Distributions */ 41*4d495c6eSApple OSS Distributions #define UTF_REVERSE_ENDIAN 0x0001 /* reverse UCS-2 byte order */ 42*4d495c6eSApple OSS Distributions #define UTF_NO_NULL_TERM 0x0002 /* do not add null termination */ 43*4d495c6eSApple OSS Distributions #define UTF_DECOMPOSED 0x0004 /* generate fully decomposed UCS-2 */ 44*4d495c6eSApple OSS Distributions #define UTF_PRECOMPOSED 0x0008 /* generate precomposed UCS-2 */ 45*4d495c6eSApple OSS Distributions #define UTF_ESCAPE_ILLEGAL 0x0010 /* escape illegal UTF-8 */ 46*4d495c6eSApple OSS Distributions #define UTF_SFM_CONVERSIONS 0x0020 /* Use SFM mappings for illegal NTFS chars */ 47*4d495c6eSApple OSS Distributions 48*4d495c6eSApple OSS Distributions #define UTF_BIG_ENDIAN \ 49*4d495c6eSApple OSS Distributions ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) 50*4d495c6eSApple OSS Distributions 51*4d495c6eSApple OSS Distributions #define UTF_LITTLE_ENDIAN \ 52*4d495c6eSApple OSS Distributions ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) 53*4d495c6eSApple OSS Distributions 54*4d495c6eSApple OSS Distributions __BEGIN_DECLS 55*4d495c6eSApple OSS Distributions 56*4d495c6eSApple OSS Distributions 57*4d495c6eSApple OSS Distributions /* 58*4d495c6eSApple OSS Distributions * unicode_combinable - Test for a combining unicode character. 59*4d495c6eSApple OSS Distributions * 60*4d495c6eSApple OSS Distributions * This function is similar to __CFUniCharIsNonBaseCharacter except 61*4d495c6eSApple OSS Distributions * that it also includes Hangul Jamo characters. 62*4d495c6eSApple OSS Distributions */ 63*4d495c6eSApple OSS Distributions 64*4d495c6eSApple OSS Distributions int unicode_combinable(u_int16_t character); 65*4d495c6eSApple OSS Distributions 66*4d495c6eSApple OSS Distributions /* 67*4d495c6eSApple OSS Distributions * Test for a precomposed character. 68*4d495c6eSApple OSS Distributions * 69*4d495c6eSApple OSS Distributions * Similar to __CFUniCharIsDecomposableCharacter. 70*4d495c6eSApple OSS Distributions */ 71*4d495c6eSApple OSS Distributions 72*4d495c6eSApple OSS Distributions int unicode_decomposeable(u_int16_t character); 73*4d495c6eSApple OSS Distributions 74*4d495c6eSApple OSS Distributions 75*4d495c6eSApple OSS Distributions /* 76*4d495c6eSApple OSS Distributions * utf8_encodelen - Calculate the UTF-8 encoding length 77*4d495c6eSApple OSS Distributions * 78*4d495c6eSApple OSS Distributions * This function takes an Unicode input string, ucsp, of ucslen bytes 79*4d495c6eSApple OSS Distributions * and calculates the size of the UTF-8 output in bytes (not including 80*4d495c6eSApple OSS Distributions * a NULL termination byte). The string must reside in kernel memory. 81*4d495c6eSApple OSS Distributions * 82*4d495c6eSApple OSS Distributions * FLAGS 83*4d495c6eSApple OSS Distributions * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime 84*4d495c6eSApple OSS Distributions * 85*4d495c6eSApple OSS Distributions * UTF_BIG_ENDIAN: Unicode byte order is always big endian 86*4d495c6eSApple OSS Distributions * 87*4d495c6eSApple OSS Distributions * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian 88*4d495c6eSApple OSS Distributions * 89*4d495c6eSApple OSS Distributions * UTF_DECOMPOSED: assume fully decomposed output 90*4d495c6eSApple OSS Distributions * 91*4d495c6eSApple OSS Distributions * ERRORS 92*4d495c6eSApple OSS Distributions * None 93*4d495c6eSApple OSS Distributions */ 94*4d495c6eSApple OSS Distributions size_t 95*4d495c6eSApple OSS Distributions utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, 96*4d495c6eSApple OSS Distributions int flags); 97*4d495c6eSApple OSS Distributions 98*4d495c6eSApple OSS Distributions 99*4d495c6eSApple OSS Distributions /* 100*4d495c6eSApple OSS Distributions * utf8_encodestr - Encodes a Unicode string into UTF-8 101*4d495c6eSApple OSS Distributions * 102*4d495c6eSApple OSS Distributions * This function takes an Unicode input string, ucsp, of ucslen bytes 103*4d495c6eSApple OSS Distributions * and produces the UTF-8 output into a buffer of buflen bytes pointed 104*4d495c6eSApple OSS Distributions * to by utf8p. The size of the output in bytes (not including a NULL 105*4d495c6eSApple OSS Distributions * termination byte) is returned in utf8len. The UTF-8 string output 106*4d495c6eSApple OSS Distributions * is NULL terminated. Both buffers must reside in kernel memory. 107*4d495c6eSApple OSS Distributions * 108*4d495c6eSApple OSS Distributions * If '/' chars are possible in the Unicode input then an alternate 109*4d495c6eSApple OSS Distributions * (replacement) char must be provided in altslash. 110*4d495c6eSApple OSS Distributions * 111*4d495c6eSApple OSS Distributions * FLAGS 112*4d495c6eSApple OSS Distributions * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime 113*4d495c6eSApple OSS Distributions * 114*4d495c6eSApple OSS Distributions * UTF_BIG_ENDIAN: Unicode byte order is always big endian 115*4d495c6eSApple OSS Distributions * 116*4d495c6eSApple OSS Distributions * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian 117*4d495c6eSApple OSS Distributions * 118*4d495c6eSApple OSS Distributions * UTF_NO_NULL_TERM: do not add null termination to output string 119*4d495c6eSApple OSS Distributions * 120*4d495c6eSApple OSS Distributions * UTF_DECOMPOSED: generate fully decomposed output 121*4d495c6eSApple OSS Distributions * 122*4d495c6eSApple OSS Distributions * ERRORS 123*4d495c6eSApple OSS Distributions * ENAMETOOLONG: output did not fit; only utf8len bytes were encoded 124*4d495c6eSApple OSS Distributions * 125*4d495c6eSApple OSS Distributions * EINVAL: illegal Unicode char encountered 126*4d495c6eSApple OSS Distributions */ 127*4d495c6eSApple OSS Distributions int 128*4d495c6eSApple OSS Distributions utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, 129*4d495c6eSApple OSS Distributions size_t * utf8len, size_t buflen, u_int16_t altslash, int flags); 130*4d495c6eSApple OSS Distributions 131*4d495c6eSApple OSS Distributions 132*4d495c6eSApple OSS Distributions /* 133*4d495c6eSApple OSS Distributions * utf8_decodestr - Decodes a UTF-8 string into Unicode 134*4d495c6eSApple OSS Distributions * 135*4d495c6eSApple OSS Distributions * This function takes an UTF-8 input string, utf8p, of utf8len bytes 136*4d495c6eSApple OSS Distributions * and produces the Unicode output into a buffer of buflen bytes pointed 137*4d495c6eSApple OSS Distributions * to by ucsp. The size of the output in bytes (not including a NULL 138*4d495c6eSApple OSS Distributions * termination byte) is returned in ucslen. Both buffers must reside 139*4d495c6eSApple OSS Distributions * in kernel memory. 140*4d495c6eSApple OSS Distributions * 141*4d495c6eSApple OSS Distributions * If '/' chars are allowed in the Unicode output then an alternate 142*4d495c6eSApple OSS Distributions * (replacement) char must be provided in altslash. 143*4d495c6eSApple OSS Distributions * 144*4d495c6eSApple OSS Distributions * FLAGS 145*4d495c6eSApple OSS Distributions * UTF_REV_ENDIAN: Unicode byte order is opposite current runtime 146*4d495c6eSApple OSS Distributions * 147*4d495c6eSApple OSS Distributions * UTF_BIG_ENDIAN: Unicode byte order is always big endian 148*4d495c6eSApple OSS Distributions * 149*4d495c6eSApple OSS Distributions * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian 150*4d495c6eSApple OSS Distributions * 151*4d495c6eSApple OSS Distributions * UTF_DECOMPOSED: generate fully decomposed output (NFD) 152*4d495c6eSApple OSS Distributions * 153*4d495c6eSApple OSS Distributions * UTF_PRECOMPOSED: generate precomposed output (NFC) 154*4d495c6eSApple OSS Distributions * 155*4d495c6eSApple OSS Distributions * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input 156*4d495c6eSApple OSS Distributions * 157*4d495c6eSApple OSS Distributions * ERRORS 158*4d495c6eSApple OSS Distributions * ENAMETOOLONG: output did not fit; only ucslen bytes were decoded. 159*4d495c6eSApple OSS Distributions * 160*4d495c6eSApple OSS Distributions * EINVAL: illegal UTF-8 sequence encountered. 161*4d495c6eSApple OSS Distributions */ 162*4d495c6eSApple OSS Distributions int 163*4d495c6eSApple OSS Distributions utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, 164*4d495c6eSApple OSS Distributions size_t *ucslen, size_t buflen, u_int16_t altslash, int flags); 165*4d495c6eSApple OSS Distributions 166*4d495c6eSApple OSS Distributions 167*4d495c6eSApple OSS Distributions /* 168*4d495c6eSApple OSS Distributions * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD) 169*4d495c6eSApple OSS Distributions * 170*4d495c6eSApple OSS Distributions * This function takes an UTF-8 input string, instr, of inlen bytes 171*4d495c6eSApple OSS Distributions * and produces normalized UTF-8 output into a buffer of buflen bytes 172*4d495c6eSApple OSS Distributions * pointed to by outstr. The size of the output in bytes (not including 173*4d495c6eSApple OSS Distributions * a NULL termination byte) is returned in outlen. In-place conversions 174*4d495c6eSApple OSS Distributions * are not supported (i.e. instr != outstr). Both buffers must reside 175*4d495c6eSApple OSS Distributions * in kernel memory. 176*4d495c6eSApple OSS Distributions * 177*4d495c6eSApple OSS Distributions * FLAGS 178*4d495c6eSApple OSS Distributions * UTF_DECOMPOSED: output string will be fully decomposed (NFD) 179*4d495c6eSApple OSS Distributions * 180*4d495c6eSApple OSS Distributions * UTF_PRECOMPOSED: output string will be precomposed (NFC) 181*4d495c6eSApple OSS Distributions * 182*4d495c6eSApple OSS Distributions * UTF_NO_NULL_TERM: do not add null termination to output string 183*4d495c6eSApple OSS Distributions * 184*4d495c6eSApple OSS Distributions * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input 185*4d495c6eSApple OSS Distributions * 186*4d495c6eSApple OSS Distributions * ERRORS 187*4d495c6eSApple OSS Distributions * ENAMETOOLONG: output did not fit or input exceeded MAXPATHLEN bytes 188*4d495c6eSApple OSS Distributions * 189*4d495c6eSApple OSS Distributions * EINVAL: illegal UTF-8 sequence encountered or invalid flags 190*4d495c6eSApple OSS Distributions */ 191*4d495c6eSApple OSS Distributions int 192*4d495c6eSApple OSS Distributions utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, 193*4d495c6eSApple OSS Distributions size_t *outlen, size_t buflen, int flags); 194*4d495c6eSApple OSS Distributions 195*4d495c6eSApple OSS Distributions 196*4d495c6eSApple OSS Distributions /* 197*4d495c6eSApple OSS Distributions * utf8_validatestr - validates a UTF-8 string 198*4d495c6eSApple OSS Distributions * 199*4d495c6eSApple OSS Distributions * This function takes an UTF-8 input string, utf8p, of utf8len bytes 200*4d495c6eSApple OSS Distributions * and determines if its valid UTF-8. The string must reside in kernel 201*4d495c6eSApple OSS Distributions * memory. 202*4d495c6eSApple OSS Distributions * 203*4d495c6eSApple OSS Distributions * ERRORS 204*4d495c6eSApple OSS Distributions * EINVAL: illegal UTF-8 sequence encountered. 205*4d495c6eSApple OSS Distributions */ 206*4d495c6eSApple OSS Distributions int 207*4d495c6eSApple OSS Distributions utf8_validatestr(const u_int8_t* utf8p, size_t utf8len); 208*4d495c6eSApple OSS Distributions 209*4d495c6eSApple OSS Distributions 210*4d495c6eSApple OSS Distributions __END_DECLS 211*4d495c6eSApple OSS Distributions 212*4d495c6eSApple OSS Distributions #endif /* __APPLE_API_UNSTABLE */ 213*4d495c6eSApple OSS Distributions #endif /* KERNEL */ 214*4d495c6eSApple OSS Distributions 215*4d495c6eSApple OSS Distributions #endif /* !_SYS_UTFCONV_H_ */ 216