Convert Unicode <> UTF-8
Use MultiByteToWideChar and WideCharToMultiByte to convert
Unicode
CStringW (utf-16) to utf-8 CStringA and reverse
The data-type CString is defined as CStringW when using unicode in your MS
Visual C++ project settings.
Newer versions of Visual C++ use unicode by
default.
- CString (unicode project) / CStringW contains unicode characters (wchar_t) and the function GetBuffer returns a pointer to wchar_t
- CStringA contains single-byte or multi-byte (MBCS) characters (char) and the function GetBuffer returns a pointer to char
Convert Unicode (utf-16) CStringW to utf-8 CStringA:
///////////////////////////////////////////////////////////////////////////////
CStringA ConvertUnicodeToUTF8(const CStringW& uni)
{
if (uni.IsEmpty()) return ""; // nothing to do
CStringA
utf8;
int cc=0;
// get length (cc) of the new multibyte string excluding the \0 terminator first
if ((cc = WideCharToMultiByte(CP_UTF8, 0,
uni, -1, NULL, 0, 0, 0) - 1) > 0)
{
// convert
char *buf = utf8.GetBuffer(cc);
if (buf) WideCharToMultiByte(CP_UTF8, 0,
uni, -1, buf, cc, 0, 0);
utf8.ReleaseBuffer();
}
return utf8;
}
Convert utf-8 CStringA to Unicode (utf-16) CStringW
///////////////////////////////////////////////////////////////////////////////
CStringW ConvertUTF8ToUnicode(const CStringA& utf8)
{
if (utf8.IsEmpty()) return L""; // nothing to do
CStringW
uni;
int cc=0;
// get length (cc) of the new widechar excluding
the \0 terminator first
if ((cc =
MultiByteToWideChar(CP_UTF8, 0, utf8, -1,
NULL, 0) - 1) > 0)
{
// convert
wchar_t *buf = uni.GetBuffer(cc);
if (buf) MultiByteToWideChar(CP_UTF8, 0,
utf8, -1, buf, cc);
uni.ReleaseBuffer();
}
return uni;
}