SumProject/my_error/uni3.cpp

#include <stdio.h>
#include <iostream>
using namespace std;
//void print_utf8_to_unicode(const char *str) {
//    while (*str) {
//        unsigned char c = *str;
//        unsigned int code = 0;
//
//        // 根据UTF-8编码规则解析字节
//        if (c < 0x80) {          // 1字节
//            code = c;
//            str += 1;
//        } else if ((c & 0xE0) == 0xC0) {  // 2字节
//            code = ((c & 0x1F) << 6) | (str[1] & 0x3F);
//            str += 2;
//        } else if ((c & 0xF0) == 0xE0) {  // 3字节（中文常用）
//            code = ((c & 0x0F) << 12) | ((str[1] & 0x3F) << 6) | (str[2] & 0x3F);
//            str += 3;
//        } else if ((c & 0xF8) == 0xF0) {  // 4字节
//            code = ((c & 0x07) << 18) | ((str[1] & 0x3F) << 12) | ((str[2] & 0x3F) << 6) | (str[3] & 0x3F);
//            str += 4;
//        } else {
//            // 处理无效字节
//            code = c;  // Unicode替换字符
//            str++;
//        }
//
//        printf("U+%04X ", code);
//    }
//}

int Utf82Unicode(char* pInput, char* pOutput)
{
	int outputSize = 0; //记录转换后的Unicode字符串的字节数
 *pOutput = 0;
	while (*pInput)
	{
		cout<< *pInput<<"; ";
		if (*pInput > 0x00 && *pInput <= 0x7F) //处理单字节UTF8字符（英文字母、数字）
		{
			*pOutput = *pInput;
			 pOutput++;
			*pOutput = 0; //小端法表示，在高地址填补0
		}
		else if (((*pInput) & 0xE0) == 0xC0) //处理双字节UTF8字符
		//else if(*pInput >= 0xC0 && *pInput < 0xE0)
		{
			char high = *pInput;
			pInput++;
			char low = *pInput;
			if ((low & 0xC0) != 0x80)  //检查是否为合法的UTF8字符表示
			{
				return -1; //如果不是则报错
			}

			*pOutput = (high << 6) + (low & 0x3F);
			pOutput++;
			*pOutput = (high >> 2) & 0x07;
		}
		else if (((*pInput) & 0xF0) == 0xE0) //处理三字节UTF8字符
		//else if(*pInput>=0xE0 && *pInput<0xF0)
		{
			char high = *pInput;
			pInput++;
			char middle = *pInput;
			pInput++;
			char low = *pInput;
			if (((middle & 0xC0) != 0x80) || ((low & 0xC0) != 0x80))
			{
				return -1;
			}
			*pOutput = (middle << 6) + (low & 0x3F);//取出middle的低两位与low的低6位，组合成unicode字符的低8位
			pOutput++;
			*pOutput = (high << 4) + ((middle >> 2) & 0x0F); //取出high的低四位与middle的中间四位，组合成unicode字符的高8位
		}
		else //对于其他字节数的UTF8字符不进行处理
		{
			return -1;
		}
		pInput ++;//处理下一个utf8字符
		pOutput ++;
		outputSize +=2;
	}
	//unicode字符串后面，有两个\0
	*pOutput = 0;
	 pOutput++;
	*pOutput = 0;
	return outputSize;
}

int main() {
    // 确保源代码文件以UTF-8编码保存
    char text[] =u8"你好";
////    print_utf8_to_unicode(text);
//    char outs[1005];
//    Utf82Unicode(text,outs);
   printf("str:%04X",text);
    return 0;
}