国产一级a片免费看高清,亚洲熟女中文字幕在线视频,黄三级高清在线播放,免费黄色视频在线看

打開APP
userphoto
未登錄

開通VIP,暢享免費(fèi)電子書等14項(xiàng)超值服

開通VIP
UTF8與GB2312之間的轉(zhuǎn)換
  相信一定有不少的程序開發(fā)人員時(shí)常會(huì)遇到字符編碼的問題,而這個(gè)問題也是非常讓人頭痛的。因?yàn)檫@些都是潛在的錯(cuò)誤,要找出這些錯(cuò)誤也得要有這方面的開發(fā)經(jīng)驗(yàn)才行。特別是在處理xml文檔時(shí) ,該問題的出現(xiàn)就更加的頻繁了,有一次用java寫服務(wù)器端程序,用vc寫客戶端與之交互。交互的協(xié)議都是用xml寫的。結(jié)果在通訊時(shí)老是發(fā)現(xiàn)數(shù)據(jù)接受不正確。納悶!于是用抓取網(wǎng)絡(luò)數(shù)據(jù)包工具抓取數(shù)據(jù),后來才發(fā)現(xiàn)原來是java上xml的頭是這樣的<?xml version="1.0" encoding="UTF-8"?>,而vc上默認(rèn)的是GB2312。所以一遇到漢字?jǐn)?shù)據(jù)就不正確了。去網(wǎng)上找資料,這方面的文章好象特別少,針對(duì)像這樣的問題,下面我介紹一下我自己寫的一個(gè)轉(zhuǎn)換程序。當(dāng)然,程序很簡(jiǎn)單。如果有畫蛇添足的地方,還望各位高手一笑了之。
  如果您對(duì)UTF-8、Unicode、GB2312等還是很陌生的話,請(qǐng)查看http://www.linuxforum.net/books/UTF-8-Unicode.html,我這里就不浪費(fèi)口舌了。下面介紹一下WinAPI的兩個(gè)函數(shù):WideCharToMultiByte、MultiByteToWideChar。

函數(shù)原型:
int WideCharToMultiByte(	UINT CodePage, // code page	DWORD dwFlags, // performance and mapping flags	LPCWSTR lpWideCharStr, // wide-character string	int cchWideChar, // number of chars in string	LPSTR lpMultiByteStr, // buffer for new string	int cbMultiByte, // size of buffer	LPCSTR lpDefaultChar, // default for unmappable chars	LPBOOL lpUsedDefaultChar // set when default char used); //將寬字符轉(zhuǎn)換成多個(gè)窄字符int MultiByteToWideChar(	UINT CodePage, // code page	DWORD dwFlags, // character-type options	LPCSTR lpMultiByteStr, // string to map	int cbMultiByte, // number of bytes in string	LPWSTR lpWideCharStr, // wide-character buffer	int cchWideChar // size of buffer);//將多個(gè)窄字符轉(zhuǎn)換成寬字符      
需要用到的一些函數(shù):
CString CXmlProcess::HexToBin(CString string)//將16進(jìn)制數(shù)轉(zhuǎn)換成2進(jìn)制{	if( string == "0") return "0000";	if( string == "1") return "0001";	if( string == "2") return "0010";	if( string == "3") return "0011";	if( string == "4") return "0100";	if( string == "5") return "0101";	if( string == "6") return "0110";	if( string == "7") return "0111";	if( string == "8") return "1000";	if( string == "9") return "1001";	if( string == "a") return "1010";	if( string == "b") return "1011";	if( string == "c") return "1100";	if( string == "d") return "1101";	if( string == "e") return "1110";	if( string == "f") return "1111";	return "";}CString CXmlProcess::BinToHex(CString BinString)//將2進(jìn)制數(shù)轉(zhuǎn)換成16進(jìn)制{	if( BinString == "0000") return "0";	if( BinString == "0001") return "1";	if( BinString == "0010") return "2";	if( BinString == "0011") return "3";	if( BinString == "0100") return "4";	if( BinString == "0101") return "5";	if( BinString == "0110") return "6";	if( BinString == "0111") return "7";	if( BinString == "1000") return "8";	if( BinString == "1001") return "9";	if( BinString == "1010") return "a";	if( BinString == "1011") return "b";	if( BinString == "1100") return "c";	if( BinString == "1101") return "d";	if( BinString == "1110") return "e";	if( BinString == "1111") return "f";	return "";}int CXmlProcess::BinToInt(CString string)//2進(jìn)制字符數(shù)據(jù)轉(zhuǎn)換成10進(jìn)制整型{	int len =0;	int tempInt = 0;	int strInt = 0;	for(int i =0 ;i < string.GetLength() ;i ++)	{	        tempInt = 1;	        strInt = (int)string.GetAt(i)-48;	        for(int k =0 ;k < 7-i ; k++)	        {			tempInt = 2*tempInt;	        }	        len += tempInt*strInt;	}	return len;}      
  UTF-8轉(zhuǎn)換成GB2312先把UTF-8轉(zhuǎn)換成Unicode.然后再把Unicode通過函數(shù)WideCharToMultiByte轉(zhuǎn)換成GB2312
WCHAR* CXmlProcess::UTF_8ToUnicode(char *ustart)  //把UTF-8轉(zhuǎn)換成Unicode{	char char_one;	char char_two;	char char_three;	int Hchar;	int Lchar;	char uchar[2];	WCHAR *unicode;	CString string_one;	CString string_two;	CString string_three;	CString combiString;	char_one = *ustart;	char_two = *(ustart+1);	char_three = *(ustart+2);	string_one.Format("%x",char_one);	string_two.Format("%x",char_two);	string_three.Format("%x",char_three);	string_three = string_three.Right(2);	string_two = string_two.Right(2);	string_one = string_one.Right(2);	string_three = HexToBin(string_three.Left(1))+HexToBin(string_three.Right(1));	string_two = HexToBin(string_two.Left(1))+HexToBin(string_two.Right(1));	string_one = HexToBin(string_one.Left(1))+HexToBin(string_one.Right(1));	combiString = string_one +string_two +string_three;	combiString = combiString.Right(20);	combiString.Delete(4,2);	combiString.Delete(10,2);	Hchar = BinToInt(combiString.Left(8));	Lchar = BinToInt(combiString.Right(8));	uchar[1] = (char)Hchar;	uchar[0] = (char)Lchar;	unicode = (WCHAR *)uchar;	return unicode;}char * CXmlProcess::UnicodeToGB2312(unsigned short uData)  //把Unicode 轉(zhuǎn)換成 GB2312{	char *buffer ;	buffer = new char[sizeof(WCHAR)];	WideCharToMultiByte(CP_ACP,NULL,&uData,1,buffer,sizeof(WCHAR),NULL,NULL);	return buffer;}      

  GB2312轉(zhuǎn)換成UTF-8先把GB2312通過函數(shù)MultiByteToWideChar轉(zhuǎn)換成Unicode.然后再把Unicode通過拆開Unicode后拼裝成UTF-8。

WCHAR * CXmlProcess::Gb2312ToUnicode(char *gbBuffer)  //GB2312 轉(zhuǎn)換成 Unicode{ 	WCHAR *uniChar;	uniChar = new WCHAR[1];	::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1);	return uniChar;}char * CXmlProcess::UnicodeToUTF_8(WCHAR *UniChar) // Unicode 轉(zhuǎn)換成UTF-8{	char *buffer;	CString strOne;	CString strTwo;	CString strThree;	CString strFour;	CString strAnd;	buffer = new char[3];	int hInt,lInt;	hInt = (int)((*UniChar)/256);	lInt = (*UniChar)%256;	CString string ;	string.Format("%x",hInt);	strTwo = HexToBin(string.Right(1));	string = string.Left(string.GetLength() - 1);	strOne = HexToBin(string.Right(1));	string.Format("%x",lInt);	strFour = HexToBin(string.Right(1));	string = string.Left(string.GetLength() -1);	strThree = HexToBin(string.Right(1));	strAnd = strOne +strTwo + strThree + strFour;	strAnd.Insert(0,"1110");	strAnd.Insert(8,"10");	strAnd.Insert(16,"10");	strOne = strAnd.Left(8);	strAnd = strAnd.Right(16);	strTwo = strAnd.Left(8);	strThree = strAnd.Right(8);	*buffer = (char)BinToInt(strOne);	buffer[1] = (char)BinToInt(strTwo);	buffer[2] = (char)BinToInt(strThree);	return buffer;}     
例子:將GB2312轉(zhuǎn)換成UTF-8的調(diào)用:
char * CXmlProcess::translateCharToUTF_8(char *xmlStream, int len) {	int newCharLen =0 ;	int oldCharLen = 0;	int revCharLen = len;	char* newCharBuffer;	char* finalCharBuffer;	char *buffer ;	CString string;	buffer  = new char[sizeof(WCHAR)];	newCharBuffer = new char[int(1.5*revCharLen)];//設(shè)置最大的一個(gè)緩沖區(qū)	while(oldCharLen < revCharLen)	{		if( *(xmlStream + oldCharLen) >= 0)		{			*(newCharBuffer+newCharLen) = *(xmlStream +oldCharLen);			newCharLen ++;			oldCharLen ++;		}//如果是英文直接復(fù)制就可以		else		{			WCHAR *pbuffer = this->Gb2312ToUnicode(xmlStream+oldCharLen);			buffer = this->UnicodeToUTF_8(pbuffer);			*(newCharBuffer+newCharLen) = *buffer;			*(newCharBuffer +newCharLen +1) = *(buffer + 1);			*(newCharBuffer +newCharLen +2) = *(buffer + 2);			newCharLen += 3;			oldCharLen += 2;		}	}	newCharBuffer[newCharLen] = ''\0'';	CString string1 ;	string1.Format("%s",newCharBuffer);	finalCharBuffer = new char[newCharLen+1];	memcpy(finalCharBuffer,newCharBuffer,newCharLen+1);	return finalCharBuffer;}
  程序都非常的簡(jiǎn)單,由于實(shí)在太窮。已經(jīng)吃了兩天的方便面。所以現(xiàn)在頭昏,程序的詳細(xì)說明就不寫了。程序員到了像我這樣的地步也真是少見。工資低沒有辦法。哎!?。?!



最新評(píng)論 [發(fā)表評(píng)論] [文章投稿]
查看所有評(píng)論
推薦給好友
打印

/*
字符串編碼轉(zhuǎn)換 GBK to UTF8 (ansi版)
xmwen@126.com
*/
char *gbk2utf8(const char *strGBK){
int len;
wchar_t *strUnicode;
char *strUTF8;
if (!strGBK){return NULL;}
len = MultiByteToWideChar(CP_GBK, 0,strGBK, -1, NULL,0);
if (len <1){return NULL;}
strUnicode = (wchar_t *) malloc(sizeof(wchar_t) * len);
if (!strUnicode){return NULL;}
len = MultiByteToWideChar(CP_GBK, 0, strGBK, -1, strUnicode, len);
if (len<1){free(strUnicode);return NULL;}
len = WideCharToMultiByte(CP_UTF8, 0, strUnicode, -1, NULL, 0, NULL, NULL);
if (len<1){free(strUnicode);return NULL;}
strUTF8 = (char *) malloc(sizeof(char) * len);
if (!strUTF8){free(strUnicode);return NULL;}
len = WideCharToMultiByte (CP_UTF8, 0, strUnicode, -1, strUTF8, len, NULL,NULL);
free(strUnicode);
if (len<1){free(strUTF8);return NULL;}
return strUTF8;
} ( xmwen 發(fā)表于 2009-11-3 19:38:00)
 
[ 原創(chuàng)文檔 本文適合中級(jí)讀者 已閱讀34485次 ]

搞笑,這種害人害己的文章還有這么多人訪問。

作者光知道 WideCharToMultiByte 可以把 Unicode 轉(zhuǎn)成 GB2312 就不知道也可以把 Unicode 轉(zhuǎn)換為 UTF-8 嗎?

其實(shí)這是一個(gè)很簡(jiǎn)單的程序,都被作者搞復(fù)雜了。

要實(shí)現(xiàn) GB2312 (其實(shí)是GBK)轉(zhuǎn)換為 UTF-8 其實(shí)很簡(jiǎn)單,先用 MultiByteToWideChar 把 GB2312 轉(zhuǎn)換為 Unicode,再用 WideCharToMultiByte 把 Unicode 轉(zhuǎn)換為 UTF-8 就可以了。

UTF-8 轉(zhuǎn)換為 GB2312 是個(gè)相反的過程,先用 MultiByteToWideChar 把 UTF-8 轉(zhuǎn)換為 Unicode,再用 WideCharToMultiByte 把 Unicode 轉(zhuǎn)換為 GB2312 就可以了。 ( 雁過留聲 發(fā)表于 2007-1-11 9:11:00)
 
translateCharToUTF_8的編碼不對(duì),
請(qǐng)作者檢查一下,
如: "你是我的好朋友"
轉(zhuǎn)換成了;"浣犳槸鎴戠殑濂芥i脲弸鍚?"
正確的應(yīng)是:
"浣犳槸鎴戠殑濂芥湅鍙嬪悧"
對(duì)于有的編碼還能對(duì)...
交流一下:kudoo.aos@gmail.com
( kudoo 發(fā)表于 2006-8-20 19:46:00)
 
shines在2005-2-6,提供了一段程序,里面有
buffersize = WideCharToMultiByte(CP_UTF8, MB_PRECOMPOSED, unicode, wide_size, NULL, 0, NULL, 0);
    buffer = new char[buffersize+1];

但是,我在調(diào)試的時(shí)候發(fā)現(xiàn):buffersize似乎已經(jīng)預(yù)先留了‘\0’的位置,或者是不是我出錯(cuò)了
比如:“i love you,愛”GB2312是需要14個(gè)字節(jié)
UTF8是需要15個(gè)字節(jié),返回時(shí)候就是這些了啊,
我的地址是:robin-fox@sohu.com,
誰能回答以下,感謝?。?( robin_fox_nan 發(fā)表于 2006-3-19 20:20:00)
 
暈.格式?jīng)]有了
原文請(qǐng)看
http://www.kbadboy.com/viewfull.asp?id=33 ( 鬼龍之舞 發(fā)表于 2005-8-25 16:13:00)
 
支持樓主!是因?yàn)槟阄也艑懗鰜淼?不管是在體積還是在速度,相信都比樓主的強(qiáng)一點(diǎn),如果不考慮移植性的話
感謝樓主!!

UTF8toUnicode proc uses esi edi lpszBuf_OUT,lpszUTF8_IN
mov esi,lpszUTF8_IN
mov edi,lpszBuf_OUT
.while TRUE 
mov al,[esi]
.if sbyte ptr al <0
mov al,[esi]
and al,00001111b
shl al,4
mov [edi+1],al
mov al,[esi+1]
and al,00111100b
shr al,2
or [edi+1],al

mov al,[esi+1]
and al,11b
shl al,6
mov [edi+0],al
mov al,[esi+2]
and al,00111111b
or  [edi+0],al
add edi,2
add esi,3
.elseif al
xor ah,ah
stosw
inc esi
.else
mov WORD ptr [edi],0
.break
.endif
.endw
ret
UTF8toUnicode endp ( 鬼龍之舞 發(fā)表于 2005-8-25 16:11:00)
 
UnicodetoUTF8 proc  uses esi edi lpBuf_OUT,lpszUTF8_IN
mov esi,lpszUTF8_IN
mov edi,lpBuf_OUT
.while TRUE 
mov ax,[esi]
.if ax==0
stosw
.break
.elseif ah==0
add esi,2
stosw
.else
mov al,[esi+1]
shr al,4
or  al,11100000b
mov [edi+0],al

mov al,[esi+1]
and al,00001111b
shl al,2
or  al,10000000b
mov ah,[esi+0]
shr ah,6
or  al,ah
mov [edi+1],al

mov al,[esi+0]
and al,00111111b
or  al,10000000b
mov [edi+2],al

add edi,3
add esi,2
.endif
.endw
ret
UnicodetoUTF8 endp ( 鬼龍之舞 發(fā)表于 2005-8-25 16:11:00)
 
幫忙弄以下 ( zztop5384 發(fā)表于 2005-4-18 10:35:00)
 
int WideCharToMultiByte(
UINT CodePage, // code page
DWORD dwFlags, // performance and mapping flags
LPCWSTR lpWideCharStr, // wide-character string
int cchWideChar, // number of chars in string
LPSTR lpMultiByteStr, // buffer for new string
int cbMultiByte, // size of buffer
LPCSTR lpDefaultChar, // default for unmappable chars
LPBOOL lpUsedDefaultChar // set when default char used
); //將寬字符轉(zhuǎn)換成多個(gè)窄字符

這些只是函數(shù)原型,并沒有具體實(shí)現(xiàn) ( zztop5384 發(fā)表于 2005-4-18 10:27:00)
 
//對(duì)不起,少加了個(gè)擴(kuò)號(hào)
WCHAR* CXmlProcess::UTF_8ToUnicode(char *pText)
{
char uchar[2];
WCHAR *unicode;

char_one = pText[0];
char_two = pText[1]);
char_three = pText[2];

uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);

unicode = (WCHAR *)uchar;
return unicode;

}
本站僅提供存儲(chǔ)服務(wù),所有內(nèi)容均由用戶發(fā)布,如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請(qǐng)點(diǎn)擊舉報(bào)。
打開APP,閱讀全文并永久保存 查看更多類似文章
猜你喜歡
類似文章
CString與utf
Unicode,UTF8互轉(zhuǎn)
C++中 Unicode 與 UTF
請(qǐng)問unsigned short如何轉(zhuǎn)成CString?
QString CString char三者之轉(zhuǎn)換集錦
CString 轉(zhuǎn)換為 string
更多類似文章 >>
生活服務(wù)
分享 收藏 導(dǎo)長圖 關(guān)注 下載文章
綁定賬號(hào)成功
后續(xù)可登錄賬號(hào)暢享VIP特權(quán)!
如果VIP功能使用有故障,
可點(diǎn)擊這里聯(lián)系客服!

聯(lián)系客服