xsum_os_specific.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. /*
  2. * xxhsum - Command line interface for xxhash algorithms
  3. * Copyright (C) 2013-2021 Yann Collet
  4. *
  5. * GPL v2 License
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with this program; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20. *
  21. * You can contact the author at:
  22. * - xxHash homepage: https://www.xxhash.com
  23. * - xxHash source repository: https://github.com/Cyan4973/xxHash
  24. */
  25. #include "xsum_os_specific.h" /* XSUM_API */
  26. #include <sys/stat.h> /* stat() / _stat64() */
  27. /*
  28. * This file contains all of the ugly boilerplate to make xxhsum work across
  29. * platforms.
  30. */
  31. #if defined(_MSC_VER) || XSUM_WIN32_USE_WCHAR
  32. typedef struct __stat64 XSUM_stat_t;
  33. # if defined(_MSC_VER)
  34. typedef int mode_t;
  35. # endif
  36. #else
  37. typedef struct stat XSUM_stat_t;
  38. #endif
  39. #if defined(__EMSCRIPTEN__) && defined(XSUM_NODE_JS)
  40. # include <unistd.h> /* isatty */
  41. # include <emscripten.h> /* EM_ASM_INT */
  42. /* The Emscripten SDK does not properly detect when the standard streams
  43. * are piped to node.js, and there does not seem to be any way to tell in
  44. * plain C. To work around it, inline JavaScript is used to call Node's
  45. * isatty() function. */
  46. static int XSUM_IS_CONSOLE(FILE* stdStream)
  47. {
  48. /* https://github.com/iliakan/detect-node */
  49. int is_node = EM_ASM_INT((
  50. return (Object.prototype.toString.call(
  51. typeof process !== 'undefined' ? process : 0
  52. ) == '[object process]') | 0
  53. ));
  54. if (is_node) {
  55. return EM_ASM_INT(
  56. return require('node:tty').isatty($0),
  57. fileno(stdStream)
  58. );
  59. } else {
  60. return isatty(fileno(stdStream));
  61. }
  62. }
  63. #elif defined(__EMSCRIPTEN__) || (defined(__linux__) && (XSUM_PLATFORM_POSIX_VERSION >= 1)) \
  64. || (XSUM_PLATFORM_POSIX_VERSION >= 200112L) \
  65. || defined(__DJGPP__) \
  66. || defined(__MSYS__) \
  67. || defined(__HAIKU__)
  68. # include <unistd.h> /* isatty */
  69. # define XSUM_IS_CONSOLE(stdStream) isatty(fileno(stdStream))
  70. #elif defined(MSDOS) || defined(OS2)
  71. # include <io.h> /* _isatty */
  72. # define XSUM_IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
  73. #elif defined(WIN32) || defined(_WIN32)
  74. # include <io.h> /* _isatty */
  75. # include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
  76. # include <stdio.h> /* FILE */
  77. static __inline int XSUM_IS_CONSOLE(FILE* stdStream)
  78. {
  79. DWORD dummy;
  80. return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
  81. }
  82. #else
  83. # define XSUM_IS_CONSOLE(stdStream) 0
  84. #endif
  85. #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
  86. # include <fcntl.h> /* _O_BINARY */
  87. # include <io.h> /* _setmode, _fileno, _get_osfhandle */
  88. # if !defined(__DJGPP__)
  89. # include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
  90. # include <winioctl.h> /* FSCTL_SET_SPARSE */
  91. # define XSUM_SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
  92. # else
  93. # define XSUM_SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
  94. # endif
  95. #else
  96. # define XSUM_SET_BINARY_MODE(file) ((void)file)
  97. #endif
  98. XSUM_API int XSUM_isConsole(FILE* stream)
  99. {
  100. return XSUM_IS_CONSOLE(stream);
  101. }
  102. XSUM_API void XSUM_setBinaryMode(FILE* stream)
  103. {
  104. XSUM_SET_BINARY_MODE(stream);
  105. }
  106. #if !XSUM_WIN32_USE_WCHAR
  107. XSUM_API FILE* XSUM_fopen(const char* filename, const char* mode)
  108. {
  109. return fopen(filename, mode);
  110. }
  111. XSUM_ATTRIBUTE((__format__(__printf__, 2, 0)))
  112. XSUM_API int XSUM_vfprintf(FILE* stream, const char* format, va_list ap)
  113. {
  114. return vfprintf(stream, format, ap);
  115. }
  116. static int XSUM_stat(const char* infilename, XSUM_stat_t* statbuf)
  117. {
  118. #if defined(_MSC_VER)
  119. return _stat64(infilename, statbuf);
  120. #else
  121. return stat(infilename, statbuf);
  122. #endif
  123. }
  124. #ifndef XSUM_NO_MAIN
  125. int main(int argc, const char* argv[])
  126. {
  127. return XSUM_main(argc, argv);
  128. }
  129. #endif
  130. /* Unicode helpers for Windows to make UTF-8 act as it should. */
  131. #else
  132. # include <windows.h>
  133. # include <wchar.h>
  134. /*****************************************************************************
  135. * Unicode conversion tools
  136. *****************************************************************************/
  137. /*
  138. * Converts a UTF-8 string to UTF-16. Acts like strdup. The string must be freed afterwards.
  139. * This version allows keeping the output length.
  140. */
  141. static wchar_t* XSUM_widenString(const char* str, int* lenOut)
  142. {
  143. int const len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
  144. if (lenOut != NULL) *lenOut = len;
  145. if (len == 0) return NULL;
  146. { wchar_t* buf = (wchar_t*)malloc((size_t)len * sizeof(wchar_t));
  147. if (buf != NULL) {
  148. if (MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len) == 0) {
  149. free(buf);
  150. return NULL;
  151. } }
  152. return buf;
  153. }
  154. }
  155. /*
  156. * Converts a UTF-16 string to UTF-8. Acts like strdup. The string must be freed afterwards.
  157. * This version allows keeping the output length.
  158. */
  159. static char* XSUM_narrowString(const wchar_t *str, int *lenOut)
  160. {
  161. int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
  162. if (lenOut != NULL) *lenOut = len;
  163. if (len == 0) return NULL;
  164. { char* const buf = (char*)malloc((size_t)len * sizeof(char));
  165. if (buf != NULL) {
  166. if (WideCharToMultiByte(CP_UTF8, 0, str, -1, buf, len, NULL, NULL) == 0) {
  167. free(buf);
  168. return NULL;
  169. } }
  170. return buf;
  171. }
  172. }
  173. /*****************************************************************************
  174. * File helpers
  175. *****************************************************************************/
  176. /*
  177. * fopen wrapper that supports UTF-8
  178. *
  179. * fopen will only accept ANSI filenames, which means that we can't open Unicode filenames.
  180. *
  181. * In order to open a Unicode filename, we need to convert filenames to UTF-16 and use _wfopen.
  182. */
  183. XSUM_API FILE* XSUM_fopen(const char* filename, const char* mode)
  184. {
  185. FILE* f = NULL;
  186. wchar_t* const wide_filename = XSUM_widenString(filename, NULL);
  187. if (wide_filename != NULL) {
  188. wchar_t* const wide_mode = XSUM_widenString(mode, NULL);
  189. if (wide_mode != NULL) {
  190. f = _wfopen(wide_filename, wide_mode);
  191. free(wide_mode);
  192. }
  193. free(wide_filename);
  194. }
  195. return f;
  196. }
  197. /*
  198. * stat() wrapper which supports UTF-8 filenames.
  199. */
  200. static int XSUM_stat(const char* infilename, XSUM_stat_t* statbuf)
  201. {
  202. int r = -1;
  203. wchar_t* const wide_filename = XSUM_widenString(infilename, NULL);
  204. if (wide_filename != NULL) {
  205. r = _wstat64(wide_filename, statbuf);
  206. free(wide_filename);
  207. }
  208. return r;
  209. }
  210. /*
  211. * In case it isn't available, this is what MSVC 2019 defines in stdarg.h.
  212. */
  213. #if defined(_MSC_VER) && !defined(__clang__) && !defined(va_copy)
  214. # define XSUM_va_copy(destination, source) ((destination) = (source))
  215. #else
  216. # define XSUM_va_copy(destination, source) va_copy(destination, source)
  217. #endif
  218. /*
  219. * vasprintf for Windows.
  220. */
  221. XSUM_ATTRIBUTE((__format__(__printf__, 2, 0)))
  222. static int XSUM_vasprintf(char** strp, const char* format, va_list ap)
  223. {
  224. int size;
  225. va_list copy;
  226. /*
  227. * To be safe, make a va_copy.
  228. *
  229. * Note that Microsoft doesn't use va_copy in its sample code:
  230. * https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/vsprintf-vsprintf-l-vswprintf-vswprintf-l-vswprintf-l?view=vs-2019
  231. */
  232. XSUM_va_copy(copy, ap);
  233. /* Calculate how many characters we need */
  234. size = _vscprintf(format, ap);
  235. va_end(copy);
  236. if (size < 0) {
  237. *strp = NULL;
  238. return size;
  239. } else {
  240. int ret;
  241. *strp = (char*) malloc((size_t)size + 1);
  242. if (*strp == NULL) {
  243. return -1;
  244. }
  245. /* vsprintf into the new buffer */
  246. ret = vsprintf(*strp, format, ap);
  247. if (ret < 0) {
  248. free(*strp);
  249. *strp = NULL;
  250. }
  251. return ret;
  252. }
  253. }
  254. /*
  255. * fprintf wrapper that supports UTF-8.
  256. *
  257. * fprintf doesn't properly handle Unicode on Windows.
  258. *
  259. * Additionally, it is codepage sensitive on console and may crash the program.
  260. *
  261. * Instead, we use vsnprintf, and either print with fwrite or convert to UTF-16
  262. * for console output and use the codepage-independent WriteConsoleW.
  263. *
  264. * Credit to t-mat: https://github.com/t-mat/xxHash/commit/5691423
  265. */
  266. XSUM_ATTRIBUTE((__format__(__printf__, 2, 0)))
  267. XSUM_API int XSUM_vfprintf(FILE *stream, const char *format, va_list ap)
  268. {
  269. int result;
  270. char* u8_str = NULL;
  271. /*
  272. * Generate the UTF-8 output string with vasprintf.
  273. */
  274. result = XSUM_vasprintf(&u8_str, format, ap);
  275. if (result >= 0) {
  276. const size_t nchar = (size_t)result + 1;
  277. /*
  278. * Check if we are outputting to a console. Don't use XSUM_isConsole
  279. * directly -- we don't need to call _get_osfhandle twice.
  280. */
  281. int fileNb = _fileno(stream);
  282. intptr_t handle_raw = _get_osfhandle(fileNb);
  283. HANDLE handle = (HANDLE)handle_raw;
  284. DWORD dwTemp;
  285. if (handle_raw < 0) {
  286. result = -1;
  287. } else if (_isatty(fileNb) && GetConsoleMode(handle, &dwTemp)) {
  288. /*
  289. * Convert to UTF-16 and output with WriteConsoleW.
  290. *
  291. * This is codepage independent and works on Windows XP's default
  292. * msvcrt.dll.
  293. */
  294. int len;
  295. wchar_t* const u16_buf = XSUM_widenString(u8_str, &len);
  296. if (u16_buf == NULL) {
  297. result = -1;
  298. } else {
  299. if (WriteConsoleW(handle, u16_buf, (DWORD)len - 1, &dwTemp, NULL)) {
  300. result = (int)dwTemp;
  301. } else {
  302. result = -1;
  303. }
  304. free(u16_buf);
  305. }
  306. } else {
  307. /* fwrite the UTF-8 string if we are printing to a file */
  308. result = (int)fwrite(u8_str, 1, nchar - 1, stream);
  309. if (result == 0) {
  310. result = -1;
  311. }
  312. }
  313. free(u8_str);
  314. }
  315. return result;
  316. }
  317. #ifndef XSUM_NO_MAIN
  318. /*****************************************************************************
  319. * Command Line argument parsing
  320. *****************************************************************************/
  321. /* Converts a UTF-16 argv to UTF-8. */
  322. static char** XSUM_convertArgv(int argc, wchar_t* utf16_argv[])
  323. {
  324. char** const utf8_argv = (char**)malloc((size_t)(argc + 1) * sizeof(char*));
  325. if (utf8_argv != NULL) {
  326. int i;
  327. for (i = 0; i < argc; i++) {
  328. utf8_argv[i] = XSUM_narrowString(utf16_argv[i], NULL);
  329. if (utf8_argv[i] == NULL) {
  330. /* Out of memory, whoops. */
  331. while (i-- > 0) {
  332. free(utf8_argv[i]);
  333. }
  334. free(utf8_argv);
  335. return NULL;
  336. }
  337. }
  338. utf8_argv[argc] = NULL;
  339. }
  340. return utf8_argv;
  341. }
  342. /* Frees arguments returned by XSUM_convertArgv */
  343. static void XSUM_freeArgv(int argc, char** argv)
  344. {
  345. int i;
  346. if (argv == NULL) {
  347. return;
  348. }
  349. for (i = 0; i < argc; i++) {
  350. free(argv[i]);
  351. }
  352. free(argv);
  353. }
  354. static int XSUM_wmain(int argc, wchar_t* utf16_argv[])
  355. {
  356. /* Convert the UTF-16 arguments to UTF-8. */
  357. char** utf8_argv = XSUM_convertArgv(argc, utf16_argv);
  358. if (utf8_argv == NULL) {
  359. /* An unfortunate but incredibly unlikely error. */
  360. fprintf(stderr, "xxhsum: error converting command line arguments!\n");
  361. abort();
  362. } else {
  363. int ret;
  364. /*
  365. * MinGW's terminal uses full block buffering for stderr.
  366. *
  367. * This is nonstandard behavior and causes text to not display until
  368. * the buffer fills.
  369. *
  370. * `setvbuf()` can easily correct this to make text display instantly.
  371. */
  372. setvbuf(stderr, NULL, _IONBF, 0);
  373. /* Call our real main function */
  374. ret = XSUM_main(argc, (void*)utf8_argv);
  375. /* Cleanup */
  376. XSUM_freeArgv(argc, utf8_argv);
  377. return ret;
  378. }
  379. }
  380. #if XSUM_WIN32_USE_WMAIN
  381. /*
  382. * The preferred method of obtaining the real UTF-16 arguments. Always works
  383. * on MSVC, sometimes works on MinGW-w64 depending on the compiler flags.
  384. */
  385. #ifdef __cplusplus
  386. extern "C"
  387. #endif
  388. int __cdecl wmain(int argc, wchar_t* utf16_argv[])
  389. {
  390. return XSUM_wmain(argc, utf16_argv);
  391. }
  392. #else /* !XSUM_WIN32_USE_WMAIN */
  393. /*
  394. * Wrap `XSUM_wmain()` using `main()` and `__wgetmainargs()` on MinGW without
  395. * Unicode support.
  396. *
  397. * `__wgetmainargs()` is used in the CRT startup to retrieve the arguments for
  398. * `wmain()`, so we use it on MinGW to emulate `wmain()`.
  399. *
  400. * It is an internal function and not declared in any public headers, so we
  401. * have to declare it manually.
  402. *
  403. * An alternative that doesn't mess with internal APIs is `GetCommandLineW()`
  404. * with `CommandLineToArgvW()`, but the former doesn't expand wildcards and the
  405. * latter requires linking to Shell32.dll and its numerous dependencies.
  406. *
  407. * This method keeps our dependencies to kernel32.dll and the CRT.
  408. *
  409. * https://docs.microsoft.com/en-us/cpp/c-runtime-library/getmainargs-wgetmainargs?view=vs-2019
  410. */
  411. typedef struct {
  412. int newmode;
  413. } _startupinfo;
  414. #ifdef __cplusplus
  415. extern "C"
  416. #endif
  417. int __cdecl __wgetmainargs(
  418. int* Argc,
  419. wchar_t*** Argv,
  420. wchar_t*** Env,
  421. int DoWildCard,
  422. _startupinfo* StartInfo
  423. );
  424. int main(int ansi_argc, const char* ansi_argv[])
  425. {
  426. int utf16_argc;
  427. wchar_t** utf16_argv;
  428. wchar_t** utf16_envp; /* Unused but required */
  429. _startupinfo startinfo = {0}; /* 0 == don't change new mode */
  430. /* Get wmain's UTF-16 arguments. Make sure we expand wildcards. */
  431. if (__wgetmainargs(&utf16_argc, &utf16_argv, &utf16_envp, 1, &startinfo) < 0)
  432. /* In the very unlikely case of an error, use the ANSI arguments. */
  433. return XSUM_main(ansi_argc, ansi_argv);
  434. /* Call XSUM_wmain with our UTF-16 arguments */
  435. return XSUM_wmain(utf16_argc, utf16_argv);
  436. }
  437. #endif /* !XSUM_WIN32_USE_WMAIN */
  438. #endif /* !XSUM_NO_MAIN */
  439. #endif /* XSUM_WIN32_USE_WCHAR */
  440. /*
  441. * Determines whether the file at filename is a directory.
  442. */
  443. XSUM_API int XSUM_isDirectory(const char* filename)
  444. {
  445. XSUM_stat_t statbuf;
  446. int r = XSUM_stat(filename, &statbuf);
  447. #ifdef _MSC_VER
  448. if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
  449. #else
  450. if (!r && S_ISDIR(statbuf.st_mode)) return 1;
  451. #endif
  452. return 0;
  453. }
  454. /*
  455. * Returns the filesize of the file at filename.
  456. */
  457. XSUM_API XSUM_U64 XSUM_getFileSize(const char* filename)
  458. {
  459. XSUM_stat_t statbuf;
  460. int r = XSUM_stat(filename, &statbuf);
  461. if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
  462. return (XSUM_U64)statbuf.st_size;
  463. }