#if defined(_WIN32) # include # include # include /*We need the following two to set stdin/stdout to binary.*/ # include # include # define WIN32_LEAN_AND_MEAN # define WIN32_EXTRA_LEAN # include # include "win32utf8.h" static char *utf16_to_utf8(const wchar_t *_src){ char *dst; size_t len; size_t si; size_t di; len=wcslen(_src); dst=(char *)malloc(sizeof(*dst)*(3*len+1)); if(dst==NULL)return dst; for(di=si=0;si>6); dst[di++]=(char)(0x80|c0&0x3F); continue; } else if(c0>=0xD800&&c0<0xDC00){ unsigned c1; /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/ c1=_src[si+1]; if(c1>=0xDC00&&c1<0xE000){ unsigned w; /*Surrogate pair.*/ w=((c0&0x3FF)<<10|c1&0x3FF)+0x10000; /*Can be represented by a 4-byte sequence.*/ dst[di++]=(char)(0xF0|w>>18); dst[di++]=(char)(0x80|w>>12&0x3F); dst[di++]=(char)(0x80|w>>6&0x3F); dst[di++]=(char)(0x80|w&0x3F); si++; continue; } } /*Anything else is either a valid 3-byte sequence, an invalid surrogate pair, or 'not a character'. In the latter two cases, we just encode the value as a 3-byte sequence anyway (producing technically invalid UTF-8). Later error handling will detect the problem, with a better chance of giving a useful error message.*/ dst[di++]=(char)(0xE0|c0>>12); dst[di++]=(char)(0x80|c0>>6&0x3F); dst[di++]=(char)(0x80|c0&0x3F); } dst[di++]='\0'; return dst; } typedef LPWSTR *(APIENTRY *command_line_to_argv_w_func)(LPCWSTR cmd_line, int *num_args); /*Make a best-effort attempt to support UTF-8 on Windows.*/ void win32_utf8_setup(int *_argc,const char ***_argv){ HMODULE hlib; /*We need to set stdin/stdout to binary mode. This is unrelated to UTF-8 support, but it's platform specific and we need to do it in the same places.*/ _setmode(_fileno(stdin),_O_BINARY); _setmode(_fileno(stdout),_O_BINARY); hlib=LoadLibraryA("shell32.dll"); if(hlib!=NULL){ command_line_to_argv_w_func command_line_to_argv_w; /*This function is only available on Windows 2000 or later.*/ command_line_to_argv_w=(command_line_to_argv_w_func)GetProcAddress(hlib, "CommandLineToArgvW"); if(command_line_to_argv_w!=NULL){ wchar_t **argvw; int argc; argvw=(*command_line_to_argv_w)(GetCommandLineW(),&argc); if(argvw!=NULL){ int ai; /*Really, I don't see why argc would ever differ from *_argc, but let's be paranoid.*/ if(argc>*_argc)argc=*_argc; for(ai=0;ai