SumProject/my_error/uni3.cpp

100 lines
2.6 KiB
C++
Raw Normal View History

2025-03-22 22:38:52 +08:00
#include <stdio.h>
#include <iostream>
using namespace std;
//void print_utf8_to_unicode(const char *str) {
// while (*str) {
// unsigned char c = *str;
// unsigned int code = 0;
//
// // <20><><EFBFBD><EFBFBD>UTF-8<><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
// if (c < 0x80) { // 1<>ֽ<EFBFBD>
// code = c;
// str += 1;
// } else if ((c & 0xE0) == 0xC0) { // 2<>ֽ<EFBFBD>
// code = ((c & 0x1F) << 6) | (str[1] & 0x3F);
// str += 2;
// } else if ((c & 0xF0) == 0xE0) { // 3<>ֽڣ<D6BD><DAA3><EFBFBD><EFBFBD>ij<EFBFBD><C4B3>ã<EFBFBD>
// code = ((c & 0x0F) << 12) | ((str[1] & 0x3F) << 6) | (str[2] & 0x3F);
// str += 3;
// } else if ((c & 0xF8) == 0xF0) { // 4<>ֽ<EFBFBD>
// code = ((c & 0x07) << 18) | ((str[1] & 0x3F) << 12) | ((str[2] & 0x3F) << 6) | (str[3] & 0x3F);
// str += 4;
// } else {
// // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD>ֽ<EFBFBD>
// code = c; // Unicode<64><EFBFBD>ַ<EFBFBD>
// str++;
// }
//
// printf("U+%04X ", code);
// }
//}
int Utf82Unicode(char* pInput, char* pOutput)
{
int outputSize = 0; //<2F><>¼ת<C2BC><D7AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Unicode<64>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
*pOutput = 0;
while (*pInput)
{
cout<< *pInput<<"; ";
if (*pInput > 0x00 && *pInput <= 0x7F) //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD>Ӣ<EFBFBD><D3A2><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD>
{
*pOutput = *pInput;
pOutput++;
*pOutput = 0; //С<>˷<EFBFBD><CBB7><EFBFBD>ʾ<EFBFBD><CABE><EFBFBD>ڸߵ<DAB8>ַ<EFBFBD>0
}
else if (((*pInput) & 0xE0) == 0xC0) //<2F><><EFBFBD><EFBFBD>˫<EFBFBD>ֽ<EFBFBD>UTF8<46>ַ<EFBFBD>
//else if(*pInput >= 0xC0 && *pInput < 0xE0)
{
char high = *pInput;
pInput++;
char low = *pInput;
if ((low & 0xC0) != 0x80) //<2F><><EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD>Ϊ<EFBFBD>Ϸ<EFBFBD><CFB7><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD>ʾ
{
return -1; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>򱨴<EFBFBD>
}
*pOutput = (high << 6) + (low & 0x3F);
pOutput++;
*pOutput = (high >> 2) & 0x07;
}
else if (((*pInput) & 0xF0) == 0xE0) //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>UTF8<46>ַ<EFBFBD>
//else if(*pInput>=0xE0 && *pInput<0xF0)
{
char high = *pInput;
pInput++;
char middle = *pInput;
pInput++;
char low = *pInput;
if (((middle & 0xC0) != 0x80) || ((low & 0xC0) != 0x80))
{
return -1;
}
*pOutput = (middle << 6) + (low & 0x3F);//ȡ<><C8A1>middle<6C>ĵ<EFBFBD><C4B5><EFBFBD>λ<EFBFBD><CEBB>low<6F>ĵ<EFBFBD><36><CEBB><EFBFBD><EFBFBD><EFBFBD>ϳ<EFBFBD>unicode<64>ַ<EFBFBD><D6B7>ĵ<EFBFBD>
pOutput++;
*pOutput = (high << 4) + ((middle >> 2) & 0x0F); //ȡ<><C8A1>high<67>ĵ<EFBFBD><C4B5><EFBFBD>λ<EFBFBD><CEBB>middle<6C><65><EFBFBD>м<EFBFBD><D0BC><EFBFBD>λ<EFBFBD><CEBB><EFBFBD><EFBFBD><EFBFBD>ϳ<EFBFBD>unicode<64>ַ<EFBFBD><D6B7>ĸ<EFBFBD>
}
else //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4><EFBFBD>
{
return -1;
}
pInput ++;//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>utf8<66>ַ<EFBFBD>
pOutput ++;
outputSize +=2;
}
//unicode<64>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E6A3AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>\0
*pOutput = 0;
pOutput++;
*pOutput = 0;
return outputSize;
}
int main() {
// ȷ<><C8B7>Դ<EFBFBD><D4B4><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD>UTF-8<><38><EFBFBD><EFBFBD><EBB1A3>
char text[] =u8"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>";
//// print_utf8_to_unicode(text);
// char outs[1005];
// Utf82Unicode(text,outs);
printf("str:%04X",text);
return 0;
}