xsum_bench.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. /*
  2. * xsum_bench - Benchmark functions for xxhsum
  3. * Copyright (C) 2013-2021 Yann Collet
  4. *
  5. * GPL v2 License
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with this program; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20. *
  21. * You can contact the author at:
  22. * - xxHash homepage: https://www.xxhash.com
  23. * - xxHash source repository: https://github.com/Cyan4973/xxHash
  24. */
  25. #include "xsum_output.h" /* XSUM_logLevel */
  26. #include "xsum_bench.h"
  27. #include "xsum_sanity_check.h" /* XSUM_fillTestBuffer */
  28. #include "xsum_os_specific.h" /* XSUM_getFileSize */
  29. #ifndef XXH_STATIC_LINKING_ONLY
  30. # define XXH_STATIC_LINKING_ONLY
  31. #endif
  32. #include "../xxhash.h"
  33. #ifdef XXHSUM_DISPATCH
  34. # include "../xxh_x86dispatch.h" /* activate _dispatch() redirectors */
  35. #endif
  36. #include <stdlib.h> /* malloc, free */
  37. #include <assert.h>
  38. #include <string.h> /* strlen, memcpy */
  39. #include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
  40. #include <errno.h> /* errno */
  41. #define TIMELOOP_S 1
  42. #define TIMELOOP (TIMELOOP_S * CLOCKS_PER_SEC) /* target timing per iteration */
  43. #define TIMELOOP_MIN (TIMELOOP / 2) /* minimum timing to validate a result */
  44. /* Each benchmark iteration attempts to match TIMELOOP (1 second).
  45. * The nb of loops is adjusted at each iteration to reach that target.
  46. * However, initially, there is no information, so 1st iteration blindly targets an arbitrary speed.
  47. * If it's too small, it will be adjusted, and a new attempt will be made.
  48. * But if it's too large, the first iteration can be very long,
  49. * before being fixed at second attempt.
  50. * So prefer starting with small speed targets.
  51. * XXH_1ST_SPEED_TARGET is defined in MB/s */
  52. #ifndef XXH_1ST_SPEED_TARGET
  53. # define XXH_1ST_SPEED_TARGET 10
  54. #endif
  55. #define MAX_MEM (2 GB - 64 MB)
  56. static clock_t XSUM_clockSpan( clock_t start )
  57. {
  58. return clock() - start; /* works even if overflow; Typical max span ~ 30 mn */
  59. }
  60. static size_t XSUM_findMaxMem(XSUM_U64 requiredMem)
  61. {
  62. size_t const step = 64 MB;
  63. void* testmem = NULL;
  64. requiredMem = (((requiredMem >> 26) + 1) << 26);
  65. requiredMem += 2*step;
  66. if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
  67. while (!testmem) {
  68. if (requiredMem > step) requiredMem -= step;
  69. else requiredMem >>= 1;
  70. testmem = malloc ((size_t)requiredMem);
  71. }
  72. free (testmem);
  73. /* keep some space available */
  74. if (requiredMem > step) requiredMem -= step;
  75. else requiredMem >>= 1;
  76. return (size_t)requiredMem;
  77. }
  78. /*
  79. * A secret buffer used for benchmarking XXH3's withSecret variants.
  80. *
  81. * In order for the bench to be realistic, the secret buffer would need to be
  82. * pre-generated.
  83. *
  84. * Adding a pointer to the parameter list would be messy.
  85. */
  86. static XSUM_U8 g_benchSecretBuf[XXH3_SECRET_SIZE_MIN];
  87. /*
  88. * Wrappers for the benchmark.
  89. *
  90. * If you would like to add other hashes to the bench, create a wrapper and add
  91. * it to the g_hashesToBench table. It will automatically be added.
  92. */
  93. typedef XSUM_U32 (*hashFunction)(const void* buffer, size_t bufferSize, XSUM_U32 seed);
  94. static XSUM_U32 localXXH32(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  95. {
  96. return XXH32(buffer, bufferSize, seed);
  97. }
  98. static XSUM_U32 localXXH32_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  99. {
  100. XXH32_state_t state;
  101. (void)seed;
  102. XXH32_reset(&state, seed);
  103. XXH32_update(&state, buffer, bufferSize);
  104. return (XSUM_U32)XXH32_digest(&state);
  105. }
  106. static XSUM_U32 localXXH64(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  107. {
  108. return (XSUM_U32)XXH64(buffer, bufferSize, seed);
  109. }
  110. static XSUM_U32 localXXH64_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  111. {
  112. XXH64_state_t state;
  113. (void)seed;
  114. XXH64_reset(&state, seed);
  115. XXH64_update(&state, buffer, bufferSize);
  116. return (XSUM_U32)XXH64_digest(&state);
  117. }
  118. static XSUM_U32 localXXH3_64b(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  119. {
  120. (void)seed;
  121. return (XSUM_U32)XXH3_64bits(buffer, bufferSize);
  122. }
  123. static XSUM_U32 localXXH3_64b_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  124. {
  125. return (XSUM_U32)XXH3_64bits_withSeed(buffer, bufferSize, seed);
  126. }
  127. static XSUM_U32 localXXH3_64b_secret(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  128. {
  129. (void)seed;
  130. return (XSUM_U32)XXH3_64bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf));
  131. }
  132. static XSUM_U32 localXXH3_128b(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  133. {
  134. (void)seed;
  135. return (XSUM_U32)(XXH3_128bits(buffer, bufferSize).low64);
  136. }
  137. static XSUM_U32 localXXH3_128b_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  138. {
  139. return (XSUM_U32)(XXH3_128bits_withSeed(buffer, bufferSize, seed).low64);
  140. }
  141. static XSUM_U32 localXXH3_128b_secret(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  142. {
  143. (void)seed;
  144. return (XSUM_U32)(XXH3_128bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf)).low64);
  145. }
  146. static XSUM_U32 localXXH3_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  147. {
  148. XXH3_state_t state;
  149. (void)seed;
  150. XXH3_64bits_reset(&state);
  151. XXH3_64bits_update(&state, buffer, bufferSize);
  152. return (XSUM_U32)XXH3_64bits_digest(&state);
  153. }
  154. static XSUM_U32 localXXH3_stream_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  155. {
  156. XXH3_state_t state;
  157. XXH3_INITSTATE(&state);
  158. XXH3_64bits_reset_withSeed(&state, (XXH64_hash_t)seed);
  159. XXH3_64bits_update(&state, buffer, bufferSize);
  160. return (XSUM_U32)XXH3_64bits_digest(&state);
  161. }
  162. static XSUM_U32 localXXH128_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  163. {
  164. XXH3_state_t state;
  165. (void)seed;
  166. XXH3_128bits_reset(&state);
  167. XXH3_128bits_update(&state, buffer, bufferSize);
  168. return (XSUM_U32)(XXH3_128bits_digest(&state).low64);
  169. }
  170. static XSUM_U32 localXXH128_stream_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
  171. {
  172. XXH3_state_t state;
  173. XXH3_INITSTATE(&state);
  174. XXH3_128bits_reset_withSeed(&state, (XXH64_hash_t)seed);
  175. XXH3_128bits_update(&state, buffer, bufferSize);
  176. return (XSUM_U32)(XXH3_128bits_digest(&state).low64);
  177. }
  178. typedef struct {
  179. const char* name;
  180. hashFunction func;
  181. } hashInfo;
  182. static const hashInfo g_hashesToBench[] = {
  183. { "XXH32", &localXXH32 },
  184. { "XXH64", &localXXH64 },
  185. { "XXH3_64b", &localXXH3_64b },
  186. { "XXH3_64b w/seed", &localXXH3_64b_seeded },
  187. { "XXH3_64b w/secret", &localXXH3_64b_secret },
  188. { "XXH128", &localXXH3_128b },
  189. { "XXH128 w/seed", &localXXH3_128b_seeded },
  190. { "XXH128 w/secret", &localXXH3_128b_secret },
  191. { "XXH32_stream", &localXXH32_stream },
  192. { "XXH64_stream", &localXXH64_stream },
  193. { "XXH3_stream", &localXXH3_stream },
  194. { "XXH3_stream w/seed",&localXXH3_stream_seeded },
  195. { "XXH128_stream", &localXXH128_stream },
  196. { "XXH128_stream w/seed",&localXXH128_stream_seeded },
  197. };
  198. #define NB_HASHFUNC (sizeof(g_hashesToBench) / sizeof(*g_hashesToBench))
  199. #define NB_TESTFUNC (1 + 2 * NB_HASHFUNC)
  200. int const g_nbTestFunctions = NB_TESTFUNC;
  201. char g_testIDs[NB_TESTFUNC] = { 0 };
  202. const char k_testIDs_default[NB_TESTFUNC] = { 0,
  203. 1 /*XXH32*/, 0,
  204. 1 /*XXH64*/, 0,
  205. 1 /*XXH3*/, 0, 0, 0, 0, 0,
  206. 1 /*XXH128*/ };
  207. int g_nbIterations = NBLOOPS_DEFAULT;
  208. #define HASHNAME_MAX 29
  209. static void XSUM_benchHash(hashFunction h, const char* hName, int testID,
  210. const void* buffer, size_t bufferSize)
  211. {
  212. XSUM_U32 nbh_perIteration = (XSUM_U32)((XXH_1ST_SPEED_TARGET MB) / (bufferSize+1)) + 1;
  213. int iterationNb, nbIterations = g_nbIterations + !g_nbIterations /* min 1 */;
  214. double fastestH = 100000000.;
  215. assert(HASHNAME_MAX > 2);
  216. XSUM_logVerbose(2, "\r%80s\r", ""); /* Clean display line */
  217. for (iterationNb = 1; iterationNb <= nbIterations; iterationNb++) {
  218. XSUM_U32 r=0;
  219. clock_t cStart;
  220. XSUM_logVerbose(2, "%2i-%-*.*s : %10u ->\r",
  221. iterationNb,
  222. HASHNAME_MAX, HASHNAME_MAX, hName,
  223. (unsigned)bufferSize);
  224. cStart = clock();
  225. while (clock() == cStart); /* starts clock() at its exact beginning */
  226. cStart = clock();
  227. { XSUM_U32 u;
  228. for (u=0; u<nbh_perIteration; u++)
  229. r += h(buffer, bufferSize, u);
  230. }
  231. if (r==0) XSUM_logVerbose(3,".\r"); /* do something with r to defeat compiler "optimizing" hash away */
  232. { clock_t const nbTicks = XSUM_clockSpan(cStart);
  233. double const ticksPerHash = ((double)nbTicks / TIMELOOP) / nbh_perIteration;
  234. /*
  235. * clock() is the only decent portable timer, but it isn't very
  236. * precise.
  237. *
  238. * Sometimes, this lack of precision is enough that the benchmark
  239. * finishes before there are enough ticks to get a meaningful result.
  240. *
  241. * For example, on a Core 2 Duo (without any sort of Turbo Boost),
  242. * the imprecise timer caused peculiar results like so:
  243. *
  244. * XXH3_64b 4800.0 MB/s // conveniently even
  245. * XXH3_64b unaligned 4800.0 MB/s
  246. * XXH3_64b seeded 9600.0 MB/s // magical 2x speedup?!
  247. * XXH3_64b seeded unaligned 4800.0 MB/s
  248. *
  249. * If we sense a suspiciously low number of ticks, we increase the
  250. * iterations until we can get something meaningful.
  251. */
  252. if (nbTicks < TIMELOOP_MIN) {
  253. /* Not enough time spent in benchmarking, risk of rounding bias */
  254. if (nbTicks == 0) { /* faster than resolution timer */
  255. nbh_perIteration *= 100;
  256. } else {
  257. /*
  258. * update nbh_perIteration so that the next round lasts
  259. * approximately 1 second.
  260. */
  261. double nbh_perSecond = (1 / ticksPerHash) + 1;
  262. if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */
  263. nbh_perIteration = (XSUM_U32)nbh_perSecond;
  264. }
  265. /* g_nbIterations==0 => quick evaluation, no claim of accuracy */
  266. if (g_nbIterations>0) {
  267. iterationNb--; /* new round for a more accurate speed evaluation */
  268. continue;
  269. }
  270. }
  271. if (ticksPerHash < fastestH) fastestH = ticksPerHash;
  272. if (fastestH>0.) { /* avoid div by zero */
  273. XSUM_logVerbose(2, "%2i-%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
  274. iterationNb,
  275. HASHNAME_MAX, HASHNAME_MAX, hName,
  276. (unsigned)bufferSize,
  277. (double)1 / fastestH,
  278. ((double)bufferSize / (1 MB)) / fastestH);
  279. } }
  280. { double nbh_perSecond = (1 / fastestH) + 1;
  281. if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */
  282. nbh_perIteration = (XSUM_U32)nbh_perSecond;
  283. }
  284. }
  285. XSUM_logVerbose(1, "%2i#%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \n",
  286. testID,
  287. HASHNAME_MAX, HASHNAME_MAX, hName,
  288. (unsigned)bufferSize,
  289. (double)1 / fastestH,
  290. ((double)bufferSize / (1 MB)) / fastestH);
  291. if (XSUM_logLevel<1)
  292. XSUM_logVerbose(0, "%u, ", (unsigned)((double)1 / fastestH));
  293. }
  294. /*
  295. * Allocates a string containing s1 and s2 concatenated. Acts like strdup.
  296. * The result must be freed.
  297. */
  298. static char* XSUM_strcatDup(const char* s1, const char* s2)
  299. {
  300. assert(s1 != NULL);
  301. assert(s2 != NULL);
  302. { size_t len1 = strlen(s1);
  303. size_t len2 = strlen(s2);
  304. char* buf = (char*)malloc(len1 + len2 + 1);
  305. if (buf != NULL) {
  306. /* strcpy(buf, s1) */
  307. memcpy(buf, s1, len1);
  308. /* strcat(buf, s2) */
  309. memcpy(buf + len1, s2, len2 + 1);
  310. }
  311. return buf;
  312. }
  313. }
  314. /*!
  315. * XSUM_benchMem():
  316. * buffer: Must be 16-byte aligned.
  317. * The real allocated size of buffer is supposed to be >= (bufferSize+3).
  318. * returns: 0 on success, 1 if error (invalid mode selected)
  319. */
  320. static void XSUM_benchMem(const void* buffer, size_t bufferSize)
  321. {
  322. assert((((size_t)buffer) & 15) == 0); /* ensure alignment */
  323. XSUM_fillTestBuffer(g_benchSecretBuf, sizeof(g_benchSecretBuf));
  324. { int i;
  325. for (i = 1; i < (int)NB_TESTFUNC; i++) {
  326. int const hashFuncID = (i-1) / 2;
  327. assert(g_hashesToBench[hashFuncID].name != NULL);
  328. if (g_testIDs[i] == 0) continue;
  329. /* aligned */
  330. if ((i % 2) == 1) {
  331. XSUM_benchHash(g_hashesToBench[hashFuncID].func, g_hashesToBench[hashFuncID].name, i, buffer, bufferSize);
  332. }
  333. /* unaligned */
  334. if ((i % 2) == 0) {
  335. /* Append "unaligned". */
  336. char* const hashNameBuf = XSUM_strcatDup(g_hashesToBench[hashFuncID].name, " unaligned");
  337. assert(hashNameBuf != NULL);
  338. XSUM_benchHash(g_hashesToBench[hashFuncID].func, hashNameBuf, i, ((const char*)buffer)+3, bufferSize);
  339. free(hashNameBuf);
  340. }
  341. } }
  342. }
  343. static size_t XSUM_selectBenchedSize(const char* fileName)
  344. {
  345. XSUM_U64 const inFileSize = XSUM_getFileSize(fileName);
  346. size_t benchedSize = (size_t) XSUM_findMaxMem(inFileSize);
  347. if ((XSUM_U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
  348. if (benchedSize < inFileSize) {
  349. XSUM_log("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20));
  350. }
  351. return benchedSize;
  352. }
  353. int XSUM_benchFiles(const char* fileNamesTable[], int nbFiles)
  354. {
  355. int fileIdx;
  356. for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
  357. const char* const inFileName = fileNamesTable[fileIdx];
  358. assert(inFileName != NULL);
  359. { FILE* const inFile = XSUM_fopen( inFileName, "rb" );
  360. size_t const benchedSize = XSUM_selectBenchedSize(inFileName);
  361. char* const buffer = (char*)calloc(benchedSize+16+3, 1);
  362. void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes */
  363. /* Checks */
  364. if (inFile==NULL){
  365. XSUM_log("Error: Could not open '%s': %s.\n", inFileName, strerror(errno));
  366. free(buffer);
  367. exit(11);
  368. }
  369. if(!buffer) {
  370. XSUM_log("\nError: Out of memory.\n");
  371. fclose(inFile);
  372. exit(12);
  373. }
  374. /* Fill input buffer */
  375. { size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
  376. fclose(inFile);
  377. if(readSize != benchedSize) {
  378. XSUM_log("\nError: Could not read '%s': %s.\n", inFileName, strerror(errno));
  379. free(buffer);
  380. exit(13);
  381. } }
  382. /* bench */
  383. XSUM_benchMem(alignedBuffer, benchedSize);
  384. free(buffer);
  385. } }
  386. return 0;
  387. }
  388. int XSUM_benchInternal(size_t keySize)
  389. {
  390. void* const buffer = calloc(keySize+16+3, 1);
  391. if (buffer == NULL) {
  392. XSUM_log("\nError: Out of memory.\n");
  393. exit(12);
  394. }
  395. { const void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF); /* align on next 16 bytes */
  396. /* bench */
  397. XSUM_logVerbose(1, "Sample of ");
  398. if (keySize > 10 KB) {
  399. XSUM_logVerbose(1, "%u KB", (unsigned)(keySize >> 10));
  400. } else {
  401. XSUM_logVerbose(1, "%u bytes", (unsigned)keySize);
  402. }
  403. XSUM_logVerbose(1, "... \n");
  404. XSUM_benchMem(alignedBuffer, keySize);
  405. free(buffer);
  406. }
  407. return 0;
  408. }