Windows下用std::wifstream读取Unicode和UTF-8文本

Windows下使用std::wifstream读取Unicode文本的方法:

std::locale loc("chs");				//windows下ok
	std::wcout.imbue(loc);
	// open as a byte stream
	std::wifstream wif("路径", std::ios::binary);
	std::codecvt_utf16<wchar_t, 0x10ffff, std::consume_header>* codecvtToUnicode = new std::codecvt_utf16 < wchar_t, 0x10ffff, std::consume_header >;
	if (wif.is_open())
	{
		// apply BOM-sensitive UTF-16 facet
		wif.imbue(std::locale(wif.getloc(), codecvtToUnicode));
		std::wstring wline;
		while (std::getline(wif, wline))
		{
			std::wstring convert;
			for (auto c : wline)
			{
				if (c != L'\0' && c != L'?')
					convert += c;
			}
			wcout << convert << endl;
		}
		wif.close();
		//delete codecvtToUnicode;     //new和delete,应该不用手动delete,在哪里delete都会崩溃(亲测)
	}

 

Windows下使用std::wifstream读取UTF-8文本的方法:

std::locale loc("chs");				//windows下ok
	std::wcout.imbue(loc);
	// open as a byte stream
	std::wifstream wif("路径", std::ios::binary);
	std::codecvt_utf8<wchar_t, 0x10ffff, std::consume_header>* codecvtToUnicode = new std::codecvt_utf8 < wchar_t, 0x10ffff, std::consume_header >;
	if (wif.is_open())
	{
		// apply BOM-sensitive UTF-8 facet
		wif.imbue(std::locale(wif.getloc(), codecvtToUnicode));
		std::wstring wline;
		while (std::getline(wif, wline))
		{
			std::wstring convert;
			for (auto c : wline)
			{
				if (c != L'\0' && c != L'?')
					convert += c;
			}
			wcout << convert << endl;
		}
		wif.close();
		//delete codecvtToUnicode;     //new和delete,应该不用手动delete,在哪里delete都会崩溃(亲测)
	}

 

 

发表评论

电子邮件地址不会被公开。 必填项已用*标注