csutil/csuctransform.h
Go to the documentation of this file.00001 /* 00002 Copyright (C) 2003 by Frank Richter 00003 00004 This library is free software; you can redistribute it and/or 00005 modify it under the terms of the GNU Library General Public 00006 License as published by the Free Software Foundation; either 00007 version 2 of the License, or (at your option) any later version. 00008 00009 This library is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 Library General Public License for more details. 00013 00014 You should have received a copy of the GNU Library General Public 00015 License along with this library; if not, write to the Free 00016 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00017 */ 00018 00019 #ifndef __CS_CSUCTRANSFORM_H__ 00020 #define __CS_CSUCTRANSFORM_H__ 00021 00022 #include "csunicode.h" 00023 00024 #include "csgeom/math.h" 00025 00037 #define CS_UC_MAX_UTF8_ENCODED 4 /* 6 to encode 32 bit */ 00038 00042 #define CS_UC_MAX_UTF16_ENCODED 2 00043 00047 #define CS_UC_MAX_UTF32_ENCODED 1 00048 #if (CS_WCHAR_T_SIZE == 1) 00049 #define CS_UC_MAX_WCHAR_T_ENCODED CS_UC_MAX_UTF8_ENCODED 00050 #elif (CS_WCHAR_T_SIZE == 2) 00051 00055 #define CS_UC_MAX_WCHAR_T_ENCODED CS_UC_MAX_UTF16_ENCODED 00056 #else 00057 #define CS_UC_MAX_WCHAR_T_ENCODED CS_UC_MAX_UTF32_ENCODED 00058 #endif 00059 00063 #define CS_UC_MAX_MAPPED 3 00064 00068 enum 00069 { 00075 csUcMapSimple = (1 << 0) 00076 }; 00077 00081 class CS_CRYSTALSPACE_EXPORT csUnicodeTransform 00082 { 00083 public: 00084 #define FAIL(ret) \ 00085 { \ 00086 if (isValid) *isValid = false; \ 00087 ch = CS_UC_CHAR_REPLACER; \ 00088 return ret; \ 00089 } 00090 00091 #define SUCCEED \ 00092 if (isValid) *isValid = true; \ 00093 return chUsed; 00094 00095 #define GET_NEXT(next) \ 00096 if ((size_t)chUsed == strlen) \ 00097 { \ 00098 FAIL(chUsed); \ 00099 } \ 00100 next = *str++; \ 00101 if (next == 0) \ 00102 { \ 00103 FAIL(chUsed); \ 00104 } \ 00105 chUsed++; 00106 00125 inline static int UTF8Decode (const utf8_char* str, size_t strlen, 00126 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00127 { 00128 if (str == 0) 00129 { 00130 FAIL(0); 00131 } 00132 int chUsed = 0; 00133 00134 utf8_char curCh; 00135 GET_NEXT(curCh); 00136 if ((curCh & 0x80) == 0) 00137 { 00138 // easy case 00139 ch = curCh; 00140 SUCCEED; 00141 } 00142 else 00143 { 00144 // Count with how many bytes this char is encoded. 00145 int n = 0; 00146 while ((n < 7) && ((curCh & (1 << (7 - n))) != 0)) { n++; } 00147 00148 if ((n < 2) || (n > 6)) 00149 { 00150 // Invalid code: first char of a "sequence" must have 00151 // at least two and at most six MSBs set 00152 FAIL(1); 00153 } 00154 00155 ch = (curCh & ((1 << (8 - n)) - 1)); 00156 00157 for (int i = 1; i < n; i++) 00158 { 00159 GET_NEXT(curCh); 00160 if ((curCh & 0xc0) != 0x80) 00161 { 00162 FAIL(chUsed); 00163 } 00164 else 00165 { 00166 ch <<= 6; 00167 ch |= (curCh & 0x3f); 00168 } 00169 } 00170 00171 // Check if in Unicode range. 00172 if (ch > CS_UC_LAST_CHAR) 00173 { 00174 FAIL(chUsed); 00175 } 00176 00177 // Check for "overlong" codes. 00178 if ((ch < 0x80) && (n > 0)) 00179 { 00180 FAIL(chUsed); 00181 } 00182 else if ((ch < 0x800) && (n > 2)) 00183 { 00184 FAIL(chUsed); 00185 } 00186 else if ((ch < 0x10000) && (n > 3)) 00187 { 00188 FAIL(chUsed); 00189 } 00190 else if ((ch < 0x200000) && (n > 4)) 00191 { 00192 FAIL(chUsed); 00193 } 00194 /* 00195 else if ((ch < 0x4000000) && (n > 5)) 00196 { 00197 FAIL(chUsed); 00198 } 00199 else if ((ch < 0x80000000) && (n > 6)) 00200 { 00201 FAIL(chUsed); 00202 } 00203 */ 00204 00205 if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 00206 || CS_UC_IS_SURROGATE(ch))) 00207 FAIL(chUsed); 00208 SUCCEED; 00209 } 00210 } 00211 00216 inline static int UTF16Decode (const utf16_char* str, size_t strlen, 00217 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00218 { 00219 if (str == 0) 00220 { 00221 FAIL(0); 00222 } 00223 int chUsed = 0; 00224 00225 utf16_char curCh; 00226 GET_NEXT(curCh); 00227 // Decode surrogate 00228 if (CS_UC_IS_SURROGATE (curCh)) 00229 { 00230 // Invalid code 00231 if (!CS_UC_IS_HIGH_SURROGATE (curCh)) 00232 { 00233 FAIL(chUsed); 00234 } 00235 ch = 0x10000 + ((curCh & 0x03ff) << 10); 00236 GET_NEXT(curCh); 00237 // Invalid code 00238 if (!CS_UC_IS_LOW_SURROGATE (curCh)) 00239 { 00240 // Fail with 1 so the char is handled upon the next Decode. 00241 FAIL(1); 00242 } 00243 ch |= (curCh & 0x3ff); 00244 } 00245 else 00246 { 00247 ch = curCh; 00248 } 00249 if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 00250 || CS_UC_IS_SURROGATE(ch))) 00251 FAIL(chUsed); 00252 SUCCEED; 00253 } 00254 00259 inline static int UTF32Decode (const utf32_char* str, size_t strlen, 00260 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00261 { 00262 if (str == 0) 00263 { 00264 FAIL(0); 00265 } 00266 int chUsed = 0; 00267 00268 GET_NEXT(ch); 00269 if ((!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 00270 || CS_UC_IS_SURROGATE(ch))) || (ch > CS_UC_LAST_CHAR)) 00271 FAIL(chUsed); 00272 SUCCEED; 00273 } 00274 00279 inline static int Decode (const utf8_char* str, size_t strlen, 00280 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00281 { 00282 return UTF8Decode (str, strlen, ch, isValid, returnNonChar); 00283 } 00288 inline static int Decode (const utf16_char* str, size_t strlen, 00289 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00290 { 00291 return UTF16Decode (str, strlen, ch, isValid, returnNonChar); 00292 } 00297 inline static int Decode (const utf32_char* str, size_t strlen, 00298 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00299 { 00300 return UTF32Decode (str, strlen, ch, isValid, returnNonChar); 00301 } 00302 00304 #undef FAIL 00305 #undef SUCCEED 00306 #undef GET_NEXT 00307 00310 #define _OUTPUT_CHAR(buf, chr) \ 00311 if (bufRemaining > 0) \ 00312 { \ 00313 if(buf) *buf++ = chr; \ 00314 bufRemaining--; \ 00315 } \ 00316 encodedLen++; 00317 00318 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(buf, chr) 00319 00335 inline static int EncodeUTF8 (const utf32_char ch, utf8_char* buf, 00336 size_t bufsize, bool allowNonchars = false) 00337 { 00338 if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 00339 || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR)) 00340 return 0; 00341 size_t bufRemaining = bufsize; 00342 int encodedLen = 0; 00343 00344 if (ch < 0x80) 00345 { 00346 OUTPUT_CHAR ((utf8_char)ch); 00347 } 00348 else if (ch < 0x800) 00349 { 00350 OUTPUT_CHAR ((utf8_char)(0xc0 | (ch >> 6))); 00351 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00352 } 00353 else if (ch < 0x10000) 00354 { 00355 OUTPUT_CHAR ((utf8_char)(0xe0 | (ch >> 12))); 00356 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f))); 00357 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00358 } 00359 else if (ch < 0x200000) 00360 { 00361 OUTPUT_CHAR ((utf8_char)(0xf0 | (ch >> 18))); 00362 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f))); 00363 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f))); 00364 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00365 } 00366 /* 00367 else if (ch < 0x4000000) 00368 { 00369 OUTPUT_CHAR ((utf8_char)(0xf8 | (ch >> 24))); 00370 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f))); 00371 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f))); 00372 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f))); 00373 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00374 } 00375 else if (ch < 0x80000000) 00376 { 00377 OUTPUT_CHAR ((utf8_char)(0xfc | (ch >> 30))); 00378 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 24) & 0x3f))); 00379 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f))); 00380 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f))); 00381 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f))); 00382 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00383 } 00384 */ 00385 return encodedLen; 00386 } 00387 00403 inline static int EncodeUTF16 (const utf32_char ch, utf16_char* buf, 00404 size_t bufsize, bool allowNonchars = false) 00405 { 00406 if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 00407 || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR)) 00408 return 0; 00409 size_t bufRemaining = bufsize; 00410 int encodedLen = 0; 00411 00412 if (ch < 0x10000) 00413 { 00414 OUTPUT_CHAR((utf16_char)ch); 00415 } 00416 else if (ch < 0x100000) 00417 { 00418 utf32_char ch_shifted = ch - 0x10000; 00419 OUTPUT_CHAR((utf16_char)((ch_shifted >> 10) 00420 | CS_UC_CHAR_HIGH_SURROGATE_FIRST)); 00421 OUTPUT_CHAR((utf16_char)((ch_shifted & 0x3ff) 00422 | CS_UC_CHAR_LOW_SURROGATE_FIRST)); 00423 } 00424 else 00425 return 0; 00426 00427 return encodedLen; 00428 } 00429 00445 inline static int EncodeUTF32 (const utf32_char ch, utf32_char* buf, 00446 size_t bufsize, bool allowNonchars = false) 00447 { 00448 if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 00449 || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR)) 00450 return 0; 00451 size_t bufRemaining = bufsize; 00452 int encodedLen = 0; 00453 00454 OUTPUT_CHAR(ch); 00455 00456 return encodedLen; 00457 } 00458 00463 inline static int Encode (const utf32_char ch, utf8_char* buf, 00464 size_t bufsize, bool allowNonchars = false) 00465 { 00466 return EncodeUTF8 (ch, buf, bufsize, allowNonchars); 00467 } 00472 inline static int Encode (const utf32_char ch, utf16_char* buf, 00473 size_t bufsize, bool allowNonchars = false) 00474 { 00475 return EncodeUTF16 (ch, buf, bufsize, allowNonchars); 00476 } 00481 inline static int Encode (const utf32_char ch, utf32_char* buf, 00482 size_t bufsize, bool allowNonchars = false) 00483 { 00484 return EncodeUTF32 (ch, buf, bufsize, allowNonchars); 00485 } 00487 #undef OUTPUT_CHAR 00488 00491 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(dest, chr) 00492 00493 #define UCTF_CONVERTER(funcName, fromType, decoder, toType, encoder) \ 00494 inline static size_t funcName (toType* dest, size_t destSize, \ 00495 const fromType* source, size_t srcSize = (size_t)-1) \ 00496 { \ 00497 if ((srcSize == 0) || (source == 0)) \ 00498 return 0; \ 00499 \ 00500 size_t bufRemaining = (destSize > 0) ? destSize - 1 : 0; \ 00501 size_t encodedLen = 0; \ 00502 \ 00503 size_t srcChars = srcSize; \ 00504 \ 00505 if (srcSize == (size_t)-1) \ 00506 { \ 00507 srcChars = 0; \ 00508 const fromType* sptr = source; \ 00509 while (*sptr++ != 0) srcChars++; \ 00510 } \ 00511 \ 00512 while (srcChars > 0) \ 00513 { \ 00514 utf32_char ch; \ 00515 int scnt = decoder (source, srcChars, ch, 0); \ 00516 if (scnt == 0) break; \ 00517 int dcnt = encoder (ch, dest, bufRemaining); \ 00518 if (dcnt == 0) \ 00519 { \ 00520 dcnt = encoder (CS_UC_CHAR_REPLACER, dest, bufRemaining); \ 00521 } \ 00522 \ 00523 if ((size_t)dcnt >= bufRemaining) \ 00524 { \ 00525 if (dest && (destSize > 0)) dest += bufRemaining; \ 00526 bufRemaining = 0; \ 00527 } \ 00528 else \ 00529 { \ 00530 bufRemaining -= dcnt; \ 00531 if (dest && (destSize > 0)) dest += dcnt; \ 00532 } \ 00533 encodedLen += dcnt; \ 00534 if ((size_t)scnt >= srcChars) break; \ 00535 srcChars -= scnt; \ 00536 source += scnt; \ 00537 } \ 00538 \ 00539 if (dest) *dest = 0; \ 00540 \ 00541 return encodedLen + 1; \ 00542 } 00543 00559 UCTF_CONVERTER (UTF8to16, utf8_char, UTF8Decode, utf16_char, EncodeUTF16); 00564 UCTF_CONVERTER (UTF8to32, utf8_char, UTF8Decode, utf32_char, EncodeUTF32); 00565 00570 UCTF_CONVERTER (UTF16to8, utf16_char, UTF16Decode, utf8_char, EncodeUTF8); 00575 UCTF_CONVERTER (UTF16to32, utf16_char, UTF16Decode, utf32_char, EncodeUTF32); 00576 00581 UCTF_CONVERTER (UTF32to8, utf32_char, UTF32Decode, utf8_char, EncodeUTF8); 00586 UCTF_CONVERTER (UTF32to16, utf32_char, UTF32Decode, utf16_char, EncodeUTF16); 00589 #undef UCTF_CONVERTER 00590 #undef OUTPUT_CHAR 00591 #undef _OUTPUT_CHAR 00592 00593 #if (CS_WCHAR_T_SIZE == 1) 00594 inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 00595 const utf8_char* source, size_t srcSize) 00596 { 00597 size_t srcChars = srcSize; 00598 if (srcSize == (size_t)-1) 00599 { 00600 srcChars = 0; 00601 const utf8_char* sptr = source; 00602 while (*sptr++ != 0) srcChars++; 00603 } 00604 if ((dest != 0) && (destSize != 0)) 00605 { 00606 size_t len = csMin (destSize - 1, srcChars); 00607 memcpy (dest, source, size * sizeof (wchar_t)); 00608 *(dest + len) = 0; 00609 } 00610 return srcChars + 1; 00611 }; 00612 00613 inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 00614 const utf16_char* source, size_t srcSize) 00615 { 00616 return UTF16to8 ((utf8_char*)dest, destSize, source, srcSize); 00617 }; 00618 00619 inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 00620 const utf32_char* source, size_t srcSize) 00621 { 00622 return UTF32to8 ((utf8_char*)dest, destSize, source, srcSize); 00623 }; 00624 00625 inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 00626 const wchar_t* source, size_t srcSize) 00627 { 00628 size_t srcChars = srcSize; 00629 if (srcSize == (size_t)-1) 00630 { 00631 srcChars = 0; 00632 const wchar_t* sptr = source; 00633 while (*sptr++ != 0) srcChars++; 00634 } 00635 if ((dest != 0) && (destSize != 0)) 00636 { 00637 size_t len = csMin (destSize - 1, srcChars); 00638 memcpy (dest, source, len * sizeof (wchar_t)); 00639 *(dest + len) = 0; 00640 } 00641 return srcChars + 1; 00642 }; 00643 00644 inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 00645 const wchar_t* source, size_t srcSize) 00646 { 00647 return UTF8to16 (dest, destSize, source, srcSize); 00648 }; 00649 00650 inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 00651 const wchar_t* source, size_t srcSize) 00652 { 00653 return UTF8to32 (dest, destSize, source, srcSize); 00654 }; 00655 00656 inline static int Decode (const wchar_t* str, size_t strlen, 00657 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00658 { 00659 return UTF8Decode ((utf8_char*)str, strlen, ch, isValid, returnNonChar); 00660 } 00661 inline static int Encode (const utf32_char ch, wchar_t* buf, 00662 size_t bufsize, bool allowNonchars = false) 00663 { 00664 return EncodeUTF8 (ch, (utf8_char*)buf, bufsize, allowNonchars); 00665 } 00666 #elif (CS_WCHAR_T_SIZE == 2) 00667 // Methods below for doxygen documentation are here as the size '2' is 00668 // default. 00669 00676 inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 00677 const utf8_char* source, size_t srcSize) 00678 { 00679 return UTF8to16 ((utf16_char*)dest, destSize, source, srcSize); 00680 }; 00681 00686 inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 00687 const utf16_char* source, size_t srcSize) 00688 { 00689 size_t srcChars = srcSize; 00690 if (srcSize == (size_t)-1) 00691 { 00692 srcChars = 0; 00693 const utf16_char* sptr = source; 00694 while (*sptr++ != 0) srcChars++; 00695 } 00696 if ((dest != 0) && (destSize != 0)) 00697 { 00698 size_t len = csMin (destSize - 1, srcChars); 00699 memcpy (dest, source, len * sizeof (wchar_t)); 00700 *(dest + len) = 0; 00701 } 00702 return srcChars + 1; 00703 }; 00704 00709 inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 00710 const utf32_char* source, size_t srcSize) 00711 { 00712 return UTF32to16 ((utf16_char*)dest, destSize, source, srcSize); 00713 }; 00714 00719 inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 00720 const wchar_t* source, size_t srcSize) 00721 { 00722 return UTF16to8 (dest, destSize, (utf16_char*)source, srcSize); 00723 }; 00724 00729 inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 00730 const wchar_t* source, size_t srcSize) 00731 { 00732 size_t srcChars = srcSize; 00733 if (srcSize == (size_t)-1) 00734 { 00735 srcChars = 0; 00736 const wchar_t* sptr = source; 00737 while (*sptr++ != 0) srcChars++; 00738 } 00739 if ((dest != 0) && (destSize != 0)) 00740 { 00741 size_t len = csMin (destSize - 1, srcChars); 00742 memcpy (dest, source, len * sizeof (wchar_t)); 00743 *(dest + len) = 0; 00744 } 00745 return srcChars + 1; 00746 }; 00747 00752 inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 00753 const wchar_t* source, size_t srcSize) 00754 { 00755 return UTF16to32 (dest, destSize, (utf16_char*)source, srcSize); 00756 }; 00757 00758 /* Decode()/Encode() overloads for wchar_t. 00759 * - On VC7+, wchar_t may be an unsigned short or the special type __wchar_t. 00760 * - On VC6 wchar_t is always an unsigned short. __wchar_t does not exist. 00761 * Now there may be conflicts with the utf16_char overloads if wchar_t is 00762 * an unsigned short. On the other hand, we would like to support VC7+'s 00763 * built-in wchar_t as well. 00764 * So: on VC7+, provide overloads for __wchar_t, on VC6, don't compile this 00765 * code at all, on other compilers, provide overloads for wchar_t instead 00766 * (by re#definining __wchar_t). 00767 */ 00768 #if !defined(CS_COMPILER_MSVC) || (_MSC_VER > 1300) 00769 #if !defined(CS_COMPILER_MSVC) 00770 #define __wchar_t wchar_t 00771 #endif 00772 00776 inline static int Decode (const __wchar_t* str, size_t strlen, 00777 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00778 { 00779 return UTF16Decode ((utf16_char*)str, strlen, ch, isValid, returnNonChar); 00780 } 00785 inline static int Encode (const utf32_char ch, __wchar_t* buf, 00786 size_t bufsize, bool allowNonchars = false) 00787 { 00788 return EncodeUTF16 (ch, (utf16_char*)buf, bufsize, allowNonchars); 00789 } 00790 #ifdef __wchar_t 00791 #undef __wchar_t 00792 #endif 00793 #endif 00794 00795 #elif (CS_WCHAR_T_SIZE == 4) 00796 inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 00797 const utf8_char* source, size_t srcSize) 00798 { 00799 return UTF8to32 ((utf32_char*)dest, destSize, source, srcSize); 00800 }; 00801 00802 inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 00803 const utf16_char* source, size_t srcSize) 00804 { 00805 return UTF16to32 ((utf32_char*)dest, destSize, source, srcSize); 00806 }; 00807 00808 inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 00809 const utf32_char* source, size_t srcSize) 00810 { 00811 size_t srcChars = srcSize; 00812 if (srcSize == (size_t)-1) 00813 { 00814 srcChars = 0; 00815 const utf32_char* sptr = source; 00816 while (*sptr++ != 0) srcChars++; 00817 } 00818 if ((dest != 0) && (destSize != 0)) 00819 { 00820 size_t len = csMin (destSize - 1, srcChars); 00821 memcpy (dest, source, len * sizeof (wchar_t)); 00822 *(dest + len) = 0; 00823 } 00824 return srcChars + 1; 00825 }; 00826 00827 inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 00828 const wchar_t* source, size_t srcSize) 00829 { 00830 return UTF32to8 (dest, destSize, (utf32_char*)source, srcSize); 00831 }; 00832 00833 inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 00834 const wchar_t* source, size_t srcSize) 00835 { 00836 return UTF32to16 (dest, destSize, (utf32_char*)source, srcSize); 00837 }; 00838 00839 inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 00840 const wchar_t* source, size_t srcSize) 00841 { 00842 size_t srcChars = srcSize; 00843 if (srcSize == (size_t)-1) 00844 { 00845 srcChars = 0; 00846 const wchar_t* sptr = source; 00847 while (*sptr++ != 0) srcChars++; 00848 } 00849 if ((dest != 0) && (destSize != 0)) 00850 { 00851 size_t len = csMin (destSize - 1, srcChars); 00852 memcpy (dest, source, len * sizeof (wchar_t)); 00853 *(dest + len) = 0; 00854 } 00855 return srcChars + 1; 00856 }; 00857 00858 inline static int Decode (const wchar_t* str, size_t strlen, 00859 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00860 { 00861 return UTF32Decode ((utf32_char*)str, strlen, ch, isValid, returnNonChar); 00862 } 00863 inline static int Encode (const utf32_char ch, wchar_t* buf, 00864 size_t bufsize, bool allowNonchars = false) 00865 { 00866 return EncodeUTF32 (ch, (utf32_char*)buf, bufsize, allowNonchars); 00867 } 00868 #else 00869 #error Odd-sized, unsupported wchar_t! 00870 #endif 00871 00884 inline static int UTF8Skip (const utf8_char* str, size_t maxSkip) 00885 { 00886 if (maxSkip < 1) return 0; 00887 00888 if ((*str & 0x80) == 0) 00889 { 00890 return 1; 00891 } 00892 else 00893 { 00894 int n = 0; 00895 while ((n < 7) && ((*str & (1 << (7 - n))) != 0)) { n++; } 00896 00897 if ((n < 2) || (n > 6)) 00898 { 00899 return 1; 00900 } 00901 00902 int skip = 1; 00903 00904 for (; skip < n; skip++) 00905 { 00906 if (((str[skip] & 0xc0) != 0x80) || ((size_t)skip > maxSkip)) 00907 { 00908 break; 00909 } 00910 } 00911 return skip; 00912 } 00913 } 00914 00925 inline static int UTF8Rewind (const utf8_char* str, size_t maxRew) 00926 { 00927 if (maxRew < 1) return 0; 00928 00929 const utf8_char* pos = str - 1; 00930 00931 if ((*pos & 0x80) == 0) 00932 { 00933 return 1; 00934 } 00935 00936 // Skip backward to the first byte of the sequence. 00937 int skip = 1; 00938 while (((*pos & 0xc0) == 0x80) && ((size_t)skip < maxRew)) 00939 { 00940 skip++; 00941 pos--; 00942 } 00943 00944 return skip; 00945 } 00946 00952 inline static int UTF16Skip (const utf16_char* str, size_t maxSkip) 00953 { 00954 if (CS_UC_IS_HIGH_SURROGATE (*str)) 00955 return (int)(csMin (maxSkip, (size_t)2)); 00956 else 00957 return (int)(csMin (maxSkip, (size_t)1)); 00958 } 00959 00965 inline static int UTF16Rewind (const utf16_char* str, size_t maxRew) 00966 { 00967 if (maxRew < 1) return 0; 00968 00969 const utf16_char* pos = str - 1; 00970 if (!CS_UC_IS_SURROGATE(*pos)) 00971 return 1; 00972 else 00973 { 00974 if ((maxRew > 1) && (CS_UC_IS_HIGH_SURROGATE(*(pos - 1)))) 00975 return 2; 00976 else 00977 return 1; 00978 } 00979 } 00980 00986 inline static int UTF32Skip (const utf32_char* str, size_t maxSkip) 00987 { 00988 (void)str; // silence gcc 00989 return (int)(csMin (maxSkip, (size_t)1)); 00990 } 00991 00997 inline static int UTF32Rewind (const utf32_char* str, size_t maxRew) 00998 { 00999 (void)str; // silence gcc 01000 if (maxRew < 1) return 0; 01001 return 1; 01002 } 01017 static size_t MapToUpper (const utf32_char ch, utf32_char* dest, 01018 size_t destSize, uint flags = 0); 01025 inline static utf32_char MapToUpper (const utf32_char ch) 01026 { 01027 utf32_char ret; 01028 MapToUpper (ch, &ret, 1, csUcMapSimple); 01029 return ret; 01030 } 01035 static size_t MapToLower (const utf32_char ch, utf32_char* dest, 01036 size_t destSize, uint flags = 0); 01037 inline static utf32_char MapToLower (const utf32_char ch) 01038 { 01039 utf32_char ret; 01040 MapToLower (ch, &ret, 1, csUcMapSimple); 01041 return ret; 01042 } 01048 static size_t MapToFold (const utf32_char ch, utf32_char* dest, 01049 size_t destSize, uint flags = 0); 01050 inline static utf32_char MapToFold (const utf32_char ch) 01051 { 01052 utf32_char ret; 01053 MapToFold (ch, &ret, 1, csUcMapSimple); 01054 return ret; 01055 } 01057 }; 01058 01061 #endif 01062
Generated for Crystal Space 2.1 by doxygen 1.6.1
