问题1:
如果你的文件有字节顺序标记,你可以检测到它,这是你发现的字节序列。在谷歌和stackoverflow上搜索就可以找到相关信息。至于“不那么丑陋”的方案:你可以重构/美化你的代码,例如编写一个用于确定BOM的函数,并在开头执行,然后根据需要调用fopen或_tfopen。然后你还可以再次重构,编写自己的fopen函数。但它仍然会很丑陋。
问题2:
是的,但是Linux上的unicode函数名称与Windows上的不一定相同。使用定义。也许编写自己的TCHAR.H。
问题3:
setlocale(LC_ALL, "en.UTF-8")
man 3 setlocale
第四个问题:
只需使用fwprintf。
另一个不是标准。
您可以使用wxWidgets工具包。
它使用unicode,并且使用在Windows和Linux和Unix和Mac上实现相同事物的类。
对于您来说更好的问题是如何将ASCII转换为Unicode,反之亦然。
操作如下:
std::string Unicode2ASCII( std::wstring wstrStringToConvert )
{
size_t sze_StringLength = wstrStringToConvert.length() ;
if(0 == sze_StringLength)
return "" ;
char* chrarry_Buffer = new char[ sze_StringLength + 1 ] ;
wcstombs( chrarry_Buffer, wstrStringToConvert.c_str(), sze_StringLength ) ; // Unicode2ASCII, const wchar_t* C-String 2 mulibyte C-String
chrarry_Buffer[sze_StringLength] = '\0' ;
std::string strASCIIstring = chrarry_Buffer ;
delete chrarry_Buffer ;
return strASCIIstring ;
}
std::wstring ASCII2Unicode( std::string strStringToConvert )
{
size_t sze_StringLength = strStringToConvert.length() ;
if(0 == sze_StringLength)
return L"" ;
wchar_t* wchrarry_Buffer = new wchar_t[ sze_StringLength + 1 ] ;
mbstowcs( wchrarry_Buffer, strStringToConvert.c_str(), sze_StringLength ) ; // Unicode2ASCII, const. mulibyte C-String 2 wchar_t* C-String
wchrarry_Buffer[sze_StringLength] = L'\0' ;
std::wstring wstrUnicodeString = wchrarry_Buffer ;
delete wchrarry_Buffer ;
return wstrUnicodeString ;
}
编辑:
以下是关于Linux(wchar.h)上可用的Unicode函数的一些见解:
__BEGIN_NAMESPACE_STD
extern wchar_t *wcscpy (wchar_t *__restrict __dest,
__const wchar_t *__restrict __src) __THROW;
extern wchar_t *wcsncpy (wchar_t *__restrict __dest,
__const wchar_t *__restrict __src, size_t __n)
__THROW;
extern wchar_t *wcscat (wchar_t *__restrict __dest,
__const wchar_t *__restrict __src) __THROW;
extern wchar_t *wcsncat (wchar_t *__restrict __dest,
__const wchar_t *__restrict __src, size_t __n)
__THROW;
extern int wcscmp (__const wchar_t *__s1, __const wchar_t *__s2)
__THROW __attribute_pure__;
extern int wcsncmp (__const wchar_t *__s1, __const wchar_t *__s2, size_t __n)
__THROW __attribute_pure__;
__END_NAMESPACE_STD
#ifdef __USE_XOPEN2K8
extern int wcscasecmp (__const wchar_t *__s1, __const wchar_t *__s2) __THROW;
extern int wcsncasecmp (__const wchar_t *__s1, __const wchar_t *__s2,
size_t __n) __THROW;
# include <xlocale.h>
extern int wcscasecmp_l (__const wchar_t *__s1, __const wchar_t *__s2,
__locale_t __loc) __THROW;
extern int wcsncasecmp_l (__const wchar_t *__s1, __const wchar_t *__s2,
size_t __n, __locale_t __loc) __THROW;
#endif
extern long int wcstol_l (__const wchar_t *__restrict __nptr,
wchar_t **__restrict __endptr, int __base,
__locale_t __loc) __THROW;
extern unsigned long int wcstoul_l (__const wchar_t *__restrict __nptr,
wchar_t **__restrict __endptr,
int __base, __locale_t __loc) __THROW;
__extension__
extern long long int wcstoll_l (__const wchar_t *__restrict __nptr,
wchar_t **__restrict __endptr,
int __base, __locale_t __loc) __THROW;
__extension__
extern unsigned long long int wcstoull_l (__const wchar_t *__restrict __nptr,
wchar_t **__restrict __endptr,
int __base, __locale_t __loc)
__THROW;
extern double wcstod_l (__const wchar_t *__restrict __nptr,
wchar_t **__restrict __endptr, __locale_t __loc)
__THROW;
extern float wcstof_l (__const wchar_t *__restrict __nptr,
wchar_t **__restrict __endptr, __locale_t __loc)
__THROW;
extern long double wcstold_l (__const wchar_t *__restrict __nptr,
wchar_t **__restrict __endptr,
__locale_t __loc) __THROW;
extern wchar_t *wcpcpy (wchar_t *__restrict __dest,
__const wchar_t *__restrict __src) __THROW;
extern wchar_t *wcpncpy (wchar_t *__restrict __dest,
__const wchar_t *__restrict __src, size_t __n)
__THROW;
#endif
#ifdef __USE_XOPEN2K8
extern __FILE *open_wmemstream (wchar_t **__bufloc, size_t *__sizeloc) __THROW;
#endif
#if defined __USE_ISOC95 || defined __USE_UNIX98
__BEGIN_NAMESPACE_STD
extern int fwide (__FILE *__fp, int __mode) __THROW;
extern int fwprintf (__FILE *__restrict __stream,
__const wchar_t *__restrict __format, ...)
;
extern int wprintf (__const wchar_t *__restrict __format, ...)
;
extern int swprintf (wchar_t *__restrict __s, size_t __n,
__const wchar_t *__restrict __format, ...)
__THROW ;
extern int vfwprintf (__FILE *__restrict __s,
__const wchar_t *__restrict __format,
__gnuc_va_list __arg)
;
extern int vwprintf (__const wchar_t *__restrict __format,
__gnuc_va_list __arg)
;
extern int vswprintf (wchar_t *__restrict __s, size_t __n,
__const wchar_t *__restrict __format,
__gnuc_va_list __arg)
__THROW ;
extern int fwscanf (__FILE *__restrict __stream,
__const wchar_t *__restrict __format, ...)
;
extern int wscanf (__const wchar_t *__restrict __format, ...)
;
extern int swscanf (__const wchar_t *__restrict __s,
__const wchar_t *__restrict __format, ...)
__THROW ;
# if defined __USE_ISOC99 && !defined __USE_GNU \
&& (!defined __LDBL_COMPAT || !defined __REDIRECT) \
&& (defined __STRICT_ANSI__ || defined __USE_XOPEN2K)
# ifdef __REDIRECT
extern int __REDIRECT (fwscanf, (__FILE *__restrict __stream,
__const wchar_t *__restrict __format, ...),
__isoc99_fwscanf)
;
extern int __REDIRECT (wscanf, (__const wchar_t *__restrict __format, ...),
__isoc99_wscanf)
;
extern int __REDIRECT_NTH (swscanf, (__const wchar_t *__restrict __s,
__const wchar_t *__restrict __format,
...), __isoc99_swscanf)
;
# else
extern int __isoc99_fwscanf (__FILE *__restrict __stream,
__const wchar_t *__restrict __format, ...);
extern int __isoc99_wscanf (__const wchar_t *__restrict __format, ...);
extern int __isoc99_swscanf (__const wchar_t *__restrict __s,
__const wchar_t *__restrict __format, ...)