muconvauto.cpp
gehe zur Dokumentation dieser Datei
00001 
00026 
00027 // Name:        src/common/convauto.cpp
00028 // Purpose:     implementation of wxConvAuto
00029 // Author:      Vadim Zeitlin
00030 // Created:     2006-04-04
00031 // RCS-ID:      $Id: muconvauto.cpp,v 1.5 2011-08-12 09:45:19 keinstein Exp $
00032 // Copyright:   (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
00033 // Licence:     wxWindows licence
00035 
00036 // ============================================================================
00037 // declarations
00038 // ============================================================================
00039 
00040 // ----------------------------------------------------------------------------
00041 // headers
00042 // ----------------------------------------------------------------------------
00043 
00044 // for compilers that support precompilation, includes "wx.h".
00045 #include "wx/wxprec.h"
00046 
00047 #ifdef __BORLANDC__
00048 #pragma hdrstop
00049 #endif
00050 
00051 #if wxUSE_WCHAR_T
00052 
00053 #ifndef WX_PRECOMP
00054 #endif //WX_PRECOMP
00055 
00056 #include "muconvauto.h"
00057 
00058 // ============================================================================
00059 // implementation
00060 // ============================================================================
00061 
00062 /* static */
00063 
00064 muConvAuto::BOMType muConvAuto::DetectBOM(const char *src, size_t srcLen)
00065 {
00066         DEBUGLOGTYPE(other,muConvAuto,_T("Detecting BOM at length %d"),srcLen);
00067         if (srcLen == wxNO_LEN) { // zero terminated string
00068                 for (srcLen = 0; src[srcLen] && srcLen <= 4; srcLen++); 
00069         }
00070         if ( srcLen < 2 ) {
00071                 // minimal BOM is 2 bytes so bail out immediately and simplify the code
00072                 // below which wouldn't need to check for length for UTF-16 cases
00073                 return BOM_None;
00074         }
00075 
00076         // examine the buffer for BOM presence
00077         //
00078         // see http://www.unicode.org/faq/utf_bom.html#BOM
00079         switch ( *(src++) ) {
00080         case '\0':
00081                 // could only be big endian UTF-32 (00 00 FE FF)
00082                 if ( srcLen >= 4 &&
00083                                 src[0] == '\0' &&
00084                                 src[1] == '\xfe' &&
00085                                 src[2] == '\xff' ) {
00086                         return BOM_UTF32BE;
00087                 }
00088                 break;
00089 
00090         case '\xfe':
00091                 // could only be big endian UTF-16 (FE FF)
00092                 if ( *src++ == '\xff' ) {
00093                         return BOM_UTF16BE;
00094                 }
00095                 break;
00096 
00097         case '\xff':
00098                 // could be either little endian UTF-16 or UTF-32, both start
00099                 // with FF FE
00100                 if ( *(src++) == '\xfe' ) {
00101                         return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
00102                                ? BOM_UTF32LE
00103                                : BOM_UTF16LE;
00104                 }
00105                 break;
00106 
00107         case '\xef':
00108                 // is this UTF-8 BOM (EF BB BF)?
00109                 if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' ) {
00110                         return BOM_UTF8;
00111                 }
00112                 break;
00113         }
00114 
00115         return BOM_None;
00116 }
00117 
00118 void muConvAuto::InitFromBOM(BOMType bomType)
00119 {
00120         DEBUGLOG(other,_T(""));
00121         m_consumedBOM = false;
00122 
00123         switch ( bomType ) {
00124         case BOM_UTF32BE:
00125                 m_conv = new wxMBConvUTF32BE;
00126                 m_ownsConv = true;
00127                 break;
00128 
00129         case BOM_UTF32LE:
00130                 m_conv = new wxMBConvUTF32LE;
00131                 m_ownsConv = true;
00132                 break;
00133 
00134         case BOM_UTF16BE:
00135                 m_conv = new wxMBConvUTF16BE;
00136                 m_ownsConv = true;
00137                 break;
00138 
00139         case BOM_UTF16LE:
00140                 m_conv = new wxMBConvUTF16LE;
00141                 m_ownsConv = true;
00142                 break;
00143 
00144         case BOM_UTF8:
00145                 m_conv = &wxConvUTF8;
00146                 m_ownsConv = false;
00147                 break;
00148 
00149         default:
00150                 wxFAIL_MSG( _T("unexpected BOM type") );
00151                 // fall through: still need to create something
00152 
00153         case BOM_None:
00154                 InitWithDefault();
00155                 m_consumedBOM = true; // as there is nothing to consume
00156         }
00157 }
00158 
00159 void muConvAuto::SkipBOM(const char **src, size_t *len) const
00160 {
00161         DEBUGLOG(other,_T(""));
00162         int ofs;
00163         switch ( m_bomType ) {
00164         case BOM_UTF32BE:
00165         case BOM_UTF32LE:
00166                 ofs = 4;
00167                 break;
00168 
00169         case BOM_UTF16BE:
00170         case BOM_UTF16LE:
00171                 ofs = 2;
00172                 break;
00173 
00174         case BOM_UTF8:
00175                 ofs = 3;
00176                 break;
00177 
00178         default:
00179                 wxFAIL_MSG( _T("unexpected BOM type") );
00180                 // fall through: still need to create something
00181 
00182         case BOM_None:
00183                 ofs = 0;
00184         }
00185 
00186         *src += ofs;
00187         if ( *len != wxNO_LEN )
00188                 *len -= ofs;
00189 }
00190 
00191 void muConvAuto::InitFromInput(const char **src, size_t *len)
00192 
00193 {
00194         DEBUGLOG(other,_T(""));
00195         m_bomType = DetectBOM(*src, *len);
00196         InitFromBOM(m_bomType);
00197         SkipBOM(src, len);
00198 }
00199 
00200 size_t muConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
00201                     const char *src, size_t srcLen) const
00202 {
00203         DEBUGLOG(other,_T(""));
00204         // we check BOM and create the appropriate conversion the first time we're
00205         // called but we also need to ensure that the BOM is skipped not only
00206         // during this initial call but also during the first call with non-NULL
00207         // dst as typically we're first called with NULL dst to calculate the
00208         // needed buffer size
00209         muConvAuto *self = wx_const_cast(muConvAuto *, this);
00210         if ( !m_conv ) {
00211                 self->InitFromInput(&src, &srcLen);
00212                 if ( dst )
00213                         self->m_consumedBOM = true;
00214         }
00215 
00216         if ( !m_consumedBOM && dst ) {
00217                 self->m_consumedBOM = true;
00218                 SkipBOM(&src, &srcLen);
00219         }
00220 
00221         size_t result = m_conv->ToWChar(dst, dstLen, src, srcLen);
00222         if (result != wxCONV_FAILED) return result;
00223 
00224         self->m_conv = m_fallback; // save for further use.
00225         self->m_ownsConv = false; // if we own fallback, it will be destroyed as fallback.
00226         return m_conv->ToWChar(dst, dstLen, src, srcLen);
00227 }
00228 
00229 size_t muConvAuto::FromWChar(char *dst, size_t dstLen,
00230                       const wchar_t *src, size_t srcLen) const
00231 {
00232         DEBUGLOG(other,_T(""));
00233         if ( !m_conv ) {
00234                 // default to UTF-8 for the multibyte output
00235                 wx_const_cast(muConvAuto *, this)->InitWithDefault();
00236         }
00237         return m_conv->FromWChar(dst, dstLen, src, srcLen);
00238 }
00239 
00240 #endif // wxUSE_WCHAR_T
00241 
00242 wxMBConv * muConvAutoFallback = &wxConvISO8859_1;
00243 

Erzeugt am Sun Aug 21 2011 10:51:56 für Mutabor von doxygen 1.7.4