CrystalSpace

Public API Reference

csutil/csuctransform.h

Go to the documentation of this file.
00001 /*
00002     Copyright (C) 2003 by Frank Richter
00003 
00004     This library is free software; you can redistribute it and/or
00005     modify it under the terms of the GNU Library General Public
00006     License as published by the Free Software Foundation; either
00007     version 2 of the License, or (at your option) any later version.
00008 
00009     This library is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012     Library General Public License for more details.
00013 
00014     You should have received a copy of the GNU Library General Public
00015     License along with this library; if not, write to the Free
00016     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00017 */
00018 
00019 #ifndef __CS_CSUCTRANSFORM_H__
00020 #define __CS_CSUCTRANSFORM_H__
00021 
00022 #include "csunicode.h"
00023 
00035 #define CS_UC_MAX_UTF8_ENCODED          4  /* 6 to encode 32 bit */
00036 
00040 #define CS_UC_MAX_UTF16_ENCODED         2
00041 
00045 #define CS_UC_MAX_UTF32_ENCODED         1
00046 #if (CS_WCHAR_T_SIZE == 1)
00047 #define CS_UC_MAX_WCHAR_T_ENCODED       CS_UC_MAX_UTF8_ENCODED
00048 #elif (CS_WCHAR_T_SIZE == 2)
00049 
00053 #define CS_UC_MAX_WCHAR_T_ENCODED       CS_UC_MAX_UTF16_ENCODED
00054 #else
00055 #define CS_UC_MAX_WCHAR_T_ENCODED       CS_UC_MAX_UTF32_ENCODED
00056 #endif
00057 
00061 #define CS_UC_MAX_MAPPED                3
00062 
00066 enum
00067 {
00073   csUcMapSimple = (1 << 0)
00074 };
00075 
00079 class CS_CRYSTALSPACE_EXPORT csUnicodeTransform
00080 {
00081 public:
00082 #define FAIL(ret)                               \
00083   {                                             \
00084     if (isValid) *isValid = false;              \
00085     ch = CS_UC_CHAR_REPLACER;                   \
00086     return ret;                                 \
00087   }
00088 
00089 #define SUCCEED                                 \
00090     if (isValid) *isValid = true;               \
00091     return chUsed;
00092   
00093 #define GET_NEXT(next)  \
00094   if ((size_t)chUsed == strlen)                 \
00095   {                                             \
00096     FAIL(chUsed);                               \
00097   }                                             \
00098   next = *str++;                                \
00099   if (next == 0)                                \
00100   {                                             \
00101     FAIL(chUsed);                               \
00102   }                                             \
00103   chUsed++;                                     
00104   
00123   inline static int UTF8Decode (const utf8_char* str, size_t strlen, 
00124     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00125   {
00126     if (str == 0)
00127     {
00128       FAIL(0);
00129     }
00130     int chUsed = 0;
00131     
00132     utf8_char curCh;
00133     GET_NEXT(curCh);
00134     if ((curCh & 0x80) == 0)
00135     {
00136       // easy case
00137       ch = curCh;
00138       SUCCEED;
00139     }
00140     else
00141     {
00142       // Count with how many bytes this char is encoded.
00143       int n = 0;
00144       while ((n < 7) && ((curCh & (1 << (7 - n))) != 0)) { n++; }
00145 
00146       if ((n < 2) || (n > 6))
00147       {
00148         // Invalid code: first char of a "sequence" must have
00149         // at least two and at most six MSBs set
00150         FAIL(1);
00151       }
00152 
00153       ch = (curCh & ((1 << (8 - n)) - 1));
00154       
00155       for (int i = 1; i < n; i++)
00156       {
00157         GET_NEXT(curCh);
00158         if ((curCh & 0xc0) != 0x80)
00159         {
00160           FAIL(chUsed);
00161         }
00162         else
00163         {
00164           ch <<= 6;
00165           ch |= (curCh & 0x3f);
00166         }
00167       }
00168       
00169       // Check if in Unicode range.
00170       if (ch > CS_UC_LAST_CHAR)
00171       {
00172         FAIL(chUsed);
00173       }
00174 
00175       // Check for "overlong" codes.
00176       if ((ch < 0x80) && (n > 0))
00177       {
00178         FAIL(chUsed);
00179       }
00180       else if ((ch < 0x800) && (n > 2))
00181       {
00182         FAIL(chUsed);
00183       }
00184       else if ((ch < 0x10000) && (n > 3))
00185       {
00186         FAIL(chUsed);
00187       }
00188       else if ((ch < 0x200000) && (n > 4))
00189       {
00190         FAIL(chUsed);
00191       }
00192       /* 
00193       else if ((ch < 0x4000000) && (n > 5))
00194       {
00195         FAIL(chUsed);
00196       }
00197       else if ((ch < 0x80000000) && (n > 6))
00198       {
00199         FAIL(chUsed);
00200       }
00201       */
00202       
00203       if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 
00204         || CS_UC_IS_SURROGATE(ch)))
00205         FAIL(chUsed);
00206       SUCCEED;
00207     }
00208   }
00209   
00214   inline static int UTF16Decode (const utf16_char* str, size_t strlen, 
00215     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00216   {
00217     if (str == 0)
00218     {
00219       FAIL(0);
00220     }
00221     int chUsed = 0;
00222     
00223     utf16_char curCh;
00224     GET_NEXT(curCh);
00225     // Decode surrogate
00226     if (CS_UC_IS_SURROGATE (curCh))
00227     {
00228       // Invalid code
00229       if (!CS_UC_IS_HIGH_SURROGATE (curCh))
00230       {
00231         FAIL(chUsed);
00232       }
00233       ch = 0x10000 + ((curCh & 0x03ff) << 10);
00234       GET_NEXT(curCh);
00235       // Invalid code
00236       if (!CS_UC_IS_LOW_SURROGATE (curCh))
00237       {
00238         // Fail with 1 so the char is handled upon the next Decode.
00239         FAIL(1);
00240       }
00241       ch |= (curCh & 0x3ff);
00242     }
00243     else
00244     {
00245       ch = curCh;
00246     }
00247     if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 
00248       || CS_UC_IS_SURROGATE(ch)))
00249       FAIL(chUsed);
00250     SUCCEED;
00251   }
00252   
00257   inline static int UTF32Decode (const utf32_char* str, size_t strlen, 
00258     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00259   {
00260     if (str == 0)
00261     {
00262       FAIL(0);
00263     }
00264     int chUsed = 0;
00265     
00266     GET_NEXT(ch);
00267     if ((!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 
00268       || CS_UC_IS_SURROGATE(ch))) || (ch > CS_UC_LAST_CHAR))
00269       FAIL(chUsed);
00270     SUCCEED;
00271   }
00272 
00277   inline static int Decode (const utf8_char* str, size_t strlen, 
00278     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00279   {
00280     return UTF8Decode (str, strlen, ch, isValid, returnNonChar);
00281   }
00286   inline static int Decode (const utf16_char* str, size_t strlen, 
00287     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00288   {
00289     return UTF16Decode (str, strlen, ch, isValid, returnNonChar);
00290   }
00295   inline static int Decode (const utf32_char* str, size_t strlen, 
00296     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00297   {
00298     return UTF32Decode (str, strlen, ch, isValid, returnNonChar);
00299   }
00300 
00302 #undef FAIL
00303 #undef SUCCEED
00304 #undef GET_NEXT
00305 
00308 #define _OUTPUT_CHAR(buf, chr)                          \
00309   if (bufRemaining > 0)                                 \
00310   {                                                     \
00311     if(buf) *buf++ = chr;                               \
00312     bufRemaining--;                                     \
00313   }                                                     \
00314   encodedLen++;
00315 
00316 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(buf, chr)
00317   
00333   inline static int EncodeUTF8 (const utf32_char ch, utf8_char* buf, 
00334     size_t bufsize, bool allowNonchars = false)
00335   {
00336     if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 
00337       || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00338       return 0;
00339     size_t bufRemaining = bufsize;
00340     int encodedLen = 0;
00341     
00342     if (ch < 0x80)
00343     {
00344       OUTPUT_CHAR ((utf8_char)ch);
00345     }
00346     else if (ch < 0x800)
00347     {
00348       OUTPUT_CHAR ((utf8_char)(0xc0 | (ch >> 6)));
00349       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00350     }
00351     else if (ch < 0x10000)
00352     {
00353       OUTPUT_CHAR ((utf8_char)(0xe0 | (ch >> 12)));
00354       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00355       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00356     }
00357     else if (ch < 0x200000)
00358     {
00359       OUTPUT_CHAR ((utf8_char)(0xf0 | (ch >> 18)));
00360       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00361       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00362       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00363     }
00364     /*
00365     else if (ch < 0x4000000)
00366     {
00367       OUTPUT_CHAR ((utf8_char)(0xf8 | (ch >> 24)));
00368       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f)));
00369       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00370       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00371       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00372     }
00373     else if (ch < 0x80000000)
00374     {
00375       OUTPUT_CHAR ((utf8_char)(0xfc | (ch >> 30)));
00376       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 24) & 0x3f)));
00377       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f)));
00378       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00379       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00380       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00381     }
00382     */
00383     return encodedLen;
00384   }
00385     
00401   inline static int EncodeUTF16 (const utf32_char ch, utf16_char* buf, 
00402     size_t bufsize, bool allowNonchars = false)
00403   {
00404     if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 
00405       || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00406       return 0;
00407     size_t bufRemaining = bufsize;
00408     int encodedLen = 0;
00409     
00410     if (ch < 0x10000)
00411     {
00412       OUTPUT_CHAR((utf16_char)ch);
00413     }
00414     else if (ch < 0x100000)
00415     {
00416       utf32_char ch_shifted = ch - 0x10000;
00417       OUTPUT_CHAR((utf16_char)((ch_shifted >> 10) 
00418         | CS_UC_CHAR_HIGH_SURROGATE_FIRST));
00419       OUTPUT_CHAR((utf16_char)((ch_shifted & 0x3ff) 
00420         | CS_UC_CHAR_LOW_SURROGATE_FIRST));
00421     }
00422     else
00423       return 0;
00424     
00425     return encodedLen;
00426   }
00427 
00443   inline static int EncodeUTF32 (const utf32_char ch, utf32_char* buf, 
00444     size_t bufsize, bool allowNonchars = false)
00445   {
00446     if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 
00447       || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00448       return 0;
00449     size_t bufRemaining = bufsize;
00450     int encodedLen = 0;
00451     
00452     OUTPUT_CHAR(ch);
00453     
00454     return encodedLen;
00455   }
00456 
00461   inline static int Encode (const utf32_char ch, utf8_char* buf, 
00462     size_t bufsize, bool allowNonchars = false)
00463   {
00464     return EncodeUTF8 (ch, buf, bufsize, allowNonchars);
00465   }
00470   inline static int Encode (const utf32_char ch, utf16_char* buf, 
00471     size_t bufsize, bool allowNonchars = false)
00472   {
00473     return EncodeUTF16 (ch, buf, bufsize, allowNonchars);
00474   }
00479   inline static int Encode (const utf32_char ch, utf32_char* buf, 
00480     size_t bufsize, bool allowNonchars = false)
00481   {
00482     return EncodeUTF32 (ch, buf, bufsize, allowNonchars);
00483   }
00485 #undef OUTPUT_CHAR
00486   
00489 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(dest, chr)
00490   
00491 #define UCTF_CONVERTER(funcName, fromType, decoder, toType, encoder)    \
00492   inline static size_t funcName (toType* dest, size_t destSize,         \
00493     const fromType* source, size_t srcSize = (size_t)-1)                \
00494   {                                                                     \
00495     if ((srcSize == 0) || (source == 0))                                \
00496       return 0;                                                         \
00497                                                                         \
00498     size_t bufRemaining = (destSize > 0) ? destSize - 1 : 0;            \
00499     size_t encodedLen = 0;                                              \
00500                                                                         \
00501     size_t srcChars = srcSize;                                          \
00502                                                                         \
00503     if (srcSize == (size_t)-1)                                          \
00504     {                                                                   \
00505       srcChars = 0;                                                     \
00506       const fromType* sptr = source;                                    \
00507       while (*sptr++ != 0) srcChars++;                                  \
00508     }                                                                   \
00509                                                                         \
00510     while (srcChars > 0)                                                \
00511     {                                                                   \
00512       utf32_char ch;                                                    \
00513       int scnt = decoder (source, srcChars, ch, 0);                     \
00514       if (scnt == 0) break;                                             \
00515       int dcnt = encoder (ch, dest, bufRemaining);                      \
00516       if (dcnt == 0)                                                    \
00517       {                                                                 \
00518         dcnt = encoder (CS_UC_CHAR_REPLACER, dest, bufRemaining);       \
00519       }                                                                 \
00520                                                                         \
00521       if ((size_t)dcnt >= bufRemaining)                                 \
00522       {                                                                 \
00523         if (dest && (destSize > 0)) dest += bufRemaining;               \
00524         bufRemaining = 0;                                               \
00525       }                                                                 \
00526       else                                                              \
00527       {                                                                 \
00528         bufRemaining -= dcnt;                                           \
00529         if (dest && (destSize > 0)) dest += dcnt;                       \
00530       }                                                                 \
00531       encodedLen += dcnt;                                               \
00532       if ((size_t)scnt >= srcChars) break;                              \
00533       srcChars -= scnt;                                                 \
00534       source += scnt;                                                   \
00535     }                                                                   \
00536                                                                         \
00537     if (dest) *dest = 0;                                                \
00538                                                                         \
00539     return encodedLen + 1;                                              \
00540   }
00541 
00557   UCTF_CONVERTER (UTF8to16, utf8_char, UTF8Decode, utf16_char, EncodeUTF16);
00562   UCTF_CONVERTER (UTF8to32, utf8_char, UTF8Decode, utf32_char, EncodeUTF32);
00563 
00568   UCTF_CONVERTER (UTF16to8, utf16_char, UTF16Decode, utf8_char, EncodeUTF8);
00573   UCTF_CONVERTER (UTF16to32, utf16_char, UTF16Decode, utf32_char, EncodeUTF32);
00574   
00579   UCTF_CONVERTER (UTF32to8, utf32_char, UTF32Decode, utf8_char, EncodeUTF8);
00584   UCTF_CONVERTER (UTF32to16, utf32_char, UTF32Decode, utf16_char, EncodeUTF16);
00587 #undef UCTF_CONVERTER
00588 #undef OUTPUT_CHAR
00589 #undef _OUTPUT_CHAR
00590 
00591 #if (CS_WCHAR_T_SIZE == 1)
00592   inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 
00593     const utf8_char* source, size_t srcSize)
00594   {
00595     size_t srcChars = srcSize;                                          
00596     if (srcSize == (size_t)-1)                                          
00597     {                                                                   
00598       srcChars = 0;                                                     
00599       const utf8_char* sptr = source;                                   
00600       while (*sptr++ != 0) srcChars++;                                  
00601     }                           
00602     if ((dest != 0) && (destSize != 0))
00603     {
00604       size_t len = MIN (destSize - 1, srcChars);
00605       memcpy (dest, source, size * sizeof (wchar_t));
00606       *(dest + len) = 0;
00607     }
00608     return srcChars + 1;
00609   };
00610 
00611   inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 
00612     const utf16_char* source, size_t srcSize)
00613   {
00614     return UTF16to8 ((utf8_char*)dest, destSize, source, srcSize);
00615   };
00616 
00617   inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 
00618     const utf32_char* source, size_t srcSize)
00619   {
00620     return UTF32to8 ((utf8_char*)dest, destSize, source, srcSize);
00621   };
00622   
00623   inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 
00624     const wchar_t* source, size_t srcSize)
00625   {
00626     size_t srcChars = srcSize;                                          
00627     if (srcSize == (size_t)-1)                                          
00628     {                                                                   
00629       srcChars = 0;                                                     
00630       const wchar_t* sptr = source;                                     
00631       while (*sptr++ != 0) srcChars++;                                  
00632     }                           
00633     if ((dest != 0) && (destSize != 0))
00634     {
00635       size_t len = MIN (destSize - 1, srcChars);
00636       memcpy (dest, source, len * sizeof (wchar_t));
00637       *(dest + len) = 0;
00638     }
00639     return srcChars + 1;
00640   };
00641 
00642   inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 
00643     const wchar_t* source, size_t srcSize)
00644   {
00645     return UTF8to16 (dest, destSize, source, srcSize);
00646   };
00647 
00648   inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 
00649     const wchar_t* source, size_t srcSize)
00650   {
00651     return UTF8to32 (dest, destSize, source, srcSize);
00652   };
00653 
00654   inline static int Decode (const wchar_t* str, size_t strlen, 
00655     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00656   {
00657     return UTF8Decode ((utf8_char*)str, strlen, ch, isValid, returnNonChar);
00658   }
00659   inline static int Encode (const utf32_char ch, wchar_t* buf, 
00660     size_t bufsize, bool allowNonchars = false)
00661   {
00662     return EncodeUTF8 (ch, (utf8_char*)buf, bufsize, allowNonchars);
00663   }
00664 #elif (CS_WCHAR_T_SIZE == 2)
00665   // Methods below for doxygen documentation are here as the size '2' is 
00666   // default.
00667   
00674   inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 
00675     const utf8_char* source, size_t srcSize)
00676   {
00677     return UTF8to16 ((utf16_char*)dest, destSize, source, srcSize);
00678   };
00679 
00684   inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 
00685     const utf16_char* source, size_t srcSize)
00686   {
00687     size_t srcChars = srcSize;                                          
00688     if (srcSize == (size_t)-1)                                          
00689     {                                                                   
00690       srcChars = 0;                                                     
00691       const utf16_char* sptr = source;                                  
00692       while (*sptr++ != 0) srcChars++;                                  
00693     }                           
00694     if ((dest != 0) && (destSize != 0))
00695     {
00696       size_t len = MIN (destSize - 1, srcChars);
00697       memcpy (dest, source, len * sizeof (wchar_t));
00698       *(dest + len) = 0;
00699     }
00700     return srcChars + 1;
00701   };
00702 
00707   inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 
00708     const utf32_char* source, size_t srcSize)
00709   {
00710     return UTF32to16 ((utf16_char*)dest, destSize, source, srcSize);
00711   };
00712   
00717   inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 
00718     const wchar_t* source, size_t srcSize)
00719   {
00720     return UTF16to8 (dest, destSize, (utf16_char*)source, srcSize);
00721   };
00722 
00727   inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 
00728     const wchar_t* source, size_t srcSize)
00729   {
00730     size_t srcChars = srcSize;                                          
00731     if (srcSize == (size_t)-1)                                          
00732     {                                                                   
00733       srcChars = 0;                                                     
00734       const wchar_t* sptr = source;                                     
00735       while (*sptr++ != 0) srcChars++;                                  
00736     }                           
00737     if ((dest != 0) && (destSize != 0))
00738     {
00739       size_t len = MIN (destSize - 1, srcChars);
00740       memcpy (dest, source, len * sizeof (wchar_t));
00741       *(dest + len) = 0;
00742     }
00743     return srcChars + 1;
00744   };
00745 
00750   inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 
00751     const wchar_t* source, size_t srcSize)
00752   {
00753     return UTF16to32 (dest, destSize, (utf16_char*)source, srcSize);
00754   };
00755 
00756   /* Decode()/Encode() overloads for wchar_t.
00757    * - On VC7+, wchar_t may be an unsigned short or the special type __wchar_t.
00758    * - On VC6 wchar_t is always an unsigned short. __wchar_t does not exist.
00759    * Now there may be conflicts with the utf16_char overloads if wchar_t is
00760    * an unsigned short. On the other hand, we would like to support VC7+'s
00761    * built-in wchar_t as well.
00762    * So: on VC7+, provide overloads for __wchar_t, on VC6, don't compile this
00763    * code at all, on other compilers, provide overloads for wchar_t instead
00764    * (by re#definining __wchar_t). 
00765    */
00766 #if !defined(CS_COMPILER_MSVC) || (_MSC_VER > 1300)
00767 #if !defined(CS_COMPILER_MSVC)
00768   #define __wchar_t wchar_t
00769 #endif  
00770 
00774   inline static int Decode (const __wchar_t* str, size_t strlen, 
00775     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00776   {
00777     return UTF16Decode ((utf16_char*)str, strlen, ch, isValid, returnNonChar);
00778   }
00783   inline static int Encode (const utf32_char ch, __wchar_t* buf, 
00784     size_t bufsize, bool allowNonchars = false)
00785   {
00786     return EncodeUTF16 (ch, (utf16_char*)buf, bufsize, allowNonchars);
00787   }
00788 #ifdef __wchar_t
00789   #undef __wchar_t
00790 #endif
00791 #endif
00792 
00793 #elif (CS_WCHAR_T_SIZE == 4)
00794   inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 
00795     const utf8_char* source, size_t srcSize)
00796   {
00797     return UTF8to32 ((utf32_char*)dest, destSize, source, srcSize);
00798   };
00799 
00800   inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 
00801     const utf16_char* source, size_t srcSize)
00802   {
00803     return UTF16to32 ((utf32_char*)dest, destSize, source, srcSize);
00804   };
00805 
00806   inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 
00807     const utf32_char* source,  size_t srcSize)
00808   {
00809     size_t srcChars = srcSize;                                          
00810     if (srcSize == (size_t)-1)                                          
00811     {                                                                   
00812       srcChars = 0;                                                     
00813       const utf32_char* sptr = source;                                  
00814       while (*sptr++ != 0) srcChars++;                                  
00815     }                           
00816     if ((dest != 0) && (destSize != 0))
00817     {
00818       size_t len = MIN (destSize - 1, srcChars);
00819       memcpy (dest, source, len * sizeof (wchar_t));
00820       *(dest + len) = 0;
00821     }
00822     return srcChars + 1;
00823   };
00824   
00825   inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 
00826     const wchar_t* source, size_t srcSize)
00827   {
00828     return UTF32to8 (dest, destSize, (utf32_char*)source, srcSize);
00829   };
00830 
00831   inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 
00832     const wchar_t* source, size_t srcSize)
00833   {
00834     return UTF32to16 (dest, destSize, (utf32_char*)source, srcSize);
00835   };
00836 
00837   inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 
00838     const wchar_t* source, size_t srcSize)
00839   {
00840     size_t srcChars = srcSize;                                          
00841     if (srcSize == (size_t)-1)                                          
00842     {                                                                   
00843       srcChars = 0;                                                     
00844       const wchar_t* sptr = source;                                     
00845       while (*sptr++ != 0) srcChars++;                                  
00846     }                           
00847     if ((dest != 0) && (destSize != 0))
00848     {
00849       size_t len = MIN (destSize - 1, srcChars);
00850       memcpy (dest, source, len * sizeof (wchar_t));
00851       *(dest + len) = 0;
00852     }
00853     return srcChars + 1;
00854   };
00855 
00856   inline static int Decode (const wchar_t* str, size_t strlen, 
00857     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00858   {
00859     return UTF32Decode ((utf32_char*)str, strlen, ch, isValid, returnNonChar);
00860   }
00861   inline static int Encode (const utf32_char ch, wchar_t* buf, 
00862     size_t bufsize, bool allowNonchars = false)
00863   {
00864     return EncodeUTF32 (ch, (utf32_char*)buf, bufsize, allowNonchars);
00865   }
00866 #else
00867   #error Odd-sized, unsupported wchar_t!
00868 #endif
00869 
00882   inline static int UTF8Skip (const utf8_char* str, size_t maxSkip)
00883   {
00884     if (maxSkip < 1) return 0;
00885   
00886     if ((*str & 0x80) == 0)
00887     {
00888       return 1;
00889     }
00890     else
00891     {
00892       int n = 0;
00893       while ((n < 7) && ((*str & (1 << (7 - n))) != 0)) { n++; }
00894 
00895       if ((n < 2) || (n > 6))
00896       {
00897         return 1;
00898       }
00899 
00900       int skip = 1;
00901       
00902       for (; skip < n; skip++)
00903       {
00904         if (((str[skip] & 0xc0) != 0x80) || ((size_t)skip > maxSkip))
00905         {
00906           break;
00907         }
00908       }
00909       return skip;
00910     }
00911   }
00912   
00923   inline static int UTF8Rewind (const utf8_char* str, size_t maxRew)
00924   {
00925     if (maxRew < 1) return 0;
00926     
00927     const utf8_char* pos = str - 1;
00928     
00929     if ((*pos & 0x80) == 0)
00930     {
00931       return 1;
00932     }
00933     
00934     // Skip backward to the first byte of the sequence.
00935     int skip = 1;
00936     while (((*pos & 0xc0) == 0x80) && ((size_t)skip < maxRew))
00937     {
00938       skip++;
00939       pos--;
00940     }
00941     
00942     return skip;
00943   }
00944   
00950   inline static int UTF16Skip (const utf16_char* str, size_t maxSkip)
00951   {
00952     if (CS_UC_IS_HIGH_SURROGATE (*str))
00953       return (int)(MIN(maxSkip, (size_t)2));
00954     else
00955       return (int)(MIN(maxSkip, (size_t)1));
00956   }
00957   
00963   inline static int UTF16Rewind (const utf16_char* str, size_t maxRew)
00964   {
00965     if (maxRew < 1) return 0;
00966     
00967     const utf16_char* pos = str - 1;
00968     if (!CS_UC_IS_SURROGATE(*pos)) 
00969       return 1;
00970     else
00971     {
00972       if ((maxRew > 1) && (CS_UC_IS_HIGH_SURROGATE(*(pos - 1))))
00973         return 2;
00974       else
00975         return 1;
00976     }
00977   }
00978   
00984   inline static int UTF32Skip (const utf32_char* str, size_t maxSkip)
00985   {
00986     (void)str; // silence gcc
00987     return (int)(MIN(maxSkip, (size_t)1));
00988   }
00989 
00995   inline static int UTF32Rewind (const utf32_char* str, size_t maxRew)
00996   {
00997     (void)str; // silence gcc
00998     if (maxRew < 1) return 0;
00999     return 1;
01000   }
01015   static size_t MapToUpper (const utf32_char ch, utf32_char* dest, 
01016     size_t destSize, uint flags = 0);
01021   static size_t MapToLower (const utf32_char ch, utf32_char* dest, 
01022     size_t destSize, uint flags = 0);
01028   static size_t MapToFold (const utf32_char ch, utf32_char* dest, 
01029     size_t destSize, uint flags = 0);
01031 };
01032 
01035 #endif
01036 

Generated for Crystal Space 1.2.1 by doxygen 1.5.3