| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448 |
- /*
- * xsum_bench - Benchmark functions for xxhsum
- * Copyright (C) 2013-2021 Yann Collet
- *
- * GPL v2 License
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * You can contact the author at:
- * - xxHash homepage: https://www.xxhash.com
- * - xxHash source repository: https://github.com/Cyan4973/xxHash
- */
- #include "xsum_output.h" /* XSUM_logLevel */
- #include "xsum_bench.h"
- #include "xsum_sanity_check.h" /* XSUM_fillTestBuffer */
- #include "xsum_os_specific.h" /* XSUM_getFileSize */
- #ifndef XXH_STATIC_LINKING_ONLY
- # define XXH_STATIC_LINKING_ONLY
- #endif
- #include "../xxhash.h"
- #ifdef XXHSUM_DISPATCH
- # include "../xxh_x86dispatch.h" /* activate _dispatch() redirectors */
- #endif
- #include <stdlib.h> /* malloc, free */
- #include <assert.h>
- #include <string.h> /* strlen, memcpy */
- #include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
- #include <errno.h> /* errno */
- #define TIMELOOP_S 1
- #define TIMELOOP (TIMELOOP_S * CLOCKS_PER_SEC) /* target timing per iteration */
- #define TIMELOOP_MIN (TIMELOOP / 2) /* minimum timing to validate a result */
- /* Each benchmark iteration attempts to match TIMELOOP (1 second).
- * The nb of loops is adjusted at each iteration to reach that target.
- * However, initially, there is no information, so 1st iteration blindly targets an arbitrary speed.
- * If it's too small, it will be adjusted, and a new attempt will be made.
- * But if it's too large, the first iteration can be very long,
- * before being fixed at second attempt.
- * So prefer starting with small speed targets.
- * XXH_1ST_SPEED_TARGET is defined in MB/s */
- #ifndef XXH_1ST_SPEED_TARGET
- # define XXH_1ST_SPEED_TARGET 10
- #endif
- #define MAX_MEM (2 GB - 64 MB)
- static clock_t XSUM_clockSpan( clock_t start )
- {
- return clock() - start; /* works even if overflow; Typical max span ~ 30 mn */
- }
- static size_t XSUM_findMaxMem(XSUM_U64 requiredMem)
- {
- size_t const step = 64 MB;
- void* testmem = NULL;
- requiredMem = (((requiredMem >> 26) + 1) << 26);
- requiredMem += 2*step;
- if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
- while (!testmem) {
- if (requiredMem > step) requiredMem -= step;
- else requiredMem >>= 1;
- testmem = malloc ((size_t)requiredMem);
- }
- free (testmem);
- /* keep some space available */
- if (requiredMem > step) requiredMem -= step;
- else requiredMem >>= 1;
- return (size_t)requiredMem;
- }
- /*
- * A secret buffer used for benchmarking XXH3's withSecret variants.
- *
- * In order for the bench to be realistic, the secret buffer would need to be
- * pre-generated.
- *
- * Adding a pointer to the parameter list would be messy.
- */
- static XSUM_U8 g_benchSecretBuf[XXH3_SECRET_SIZE_MIN];
- /*
- * Wrappers for the benchmark.
- *
- * If you would like to add other hashes to the bench, create a wrapper and add
- * it to the g_hashesToBench table. It will automatically be added.
- */
- typedef XSUM_U32 (*hashFunction)(const void* buffer, size_t bufferSize, XSUM_U32 seed);
- static XSUM_U32 localXXH32(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- return XXH32(buffer, bufferSize, seed);
- }
- static XSUM_U32 localXXH32_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- XXH32_state_t state;
- (void)seed;
- XXH32_reset(&state, seed);
- XXH32_update(&state, buffer, bufferSize);
- return (XSUM_U32)XXH32_digest(&state);
- }
- static XSUM_U32 localXXH64(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- return (XSUM_U32)XXH64(buffer, bufferSize, seed);
- }
- static XSUM_U32 localXXH64_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- XXH64_state_t state;
- (void)seed;
- XXH64_reset(&state, seed);
- XXH64_update(&state, buffer, bufferSize);
- return (XSUM_U32)XXH64_digest(&state);
- }
- static XSUM_U32 localXXH3_64b(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- (void)seed;
- return (XSUM_U32)XXH3_64bits(buffer, bufferSize);
- }
- static XSUM_U32 localXXH3_64b_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- return (XSUM_U32)XXH3_64bits_withSeed(buffer, bufferSize, seed);
- }
- static XSUM_U32 localXXH3_64b_secret(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- (void)seed;
- return (XSUM_U32)XXH3_64bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf));
- }
- static XSUM_U32 localXXH3_128b(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- (void)seed;
- return (XSUM_U32)(XXH3_128bits(buffer, bufferSize).low64);
- }
- static XSUM_U32 localXXH3_128b_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- return (XSUM_U32)(XXH3_128bits_withSeed(buffer, bufferSize, seed).low64);
- }
- static XSUM_U32 localXXH3_128b_secret(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- (void)seed;
- return (XSUM_U32)(XXH3_128bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf)).low64);
- }
- static XSUM_U32 localXXH3_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- XXH3_state_t state;
- (void)seed;
- XXH3_64bits_reset(&state);
- XXH3_64bits_update(&state, buffer, bufferSize);
- return (XSUM_U32)XXH3_64bits_digest(&state);
- }
- static XSUM_U32 localXXH3_stream_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- XXH3_state_t state;
- XXH3_INITSTATE(&state);
- XXH3_64bits_reset_withSeed(&state, (XXH64_hash_t)seed);
- XXH3_64bits_update(&state, buffer, bufferSize);
- return (XSUM_U32)XXH3_64bits_digest(&state);
- }
- static XSUM_U32 localXXH128_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- XXH3_state_t state;
- (void)seed;
- XXH3_128bits_reset(&state);
- XXH3_128bits_update(&state, buffer, bufferSize);
- return (XSUM_U32)(XXH3_128bits_digest(&state).low64);
- }
- static XSUM_U32 localXXH128_stream_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
- {
- XXH3_state_t state;
- XXH3_INITSTATE(&state);
- XXH3_128bits_reset_withSeed(&state, (XXH64_hash_t)seed);
- XXH3_128bits_update(&state, buffer, bufferSize);
- return (XSUM_U32)(XXH3_128bits_digest(&state).low64);
- }
- typedef struct {
- const char* name;
- hashFunction func;
- } hashInfo;
- static const hashInfo g_hashesToBench[] = {
- { "XXH32", &localXXH32 },
- { "XXH64", &localXXH64 },
- { "XXH3_64b", &localXXH3_64b },
- { "XXH3_64b w/seed", &localXXH3_64b_seeded },
- { "XXH3_64b w/secret", &localXXH3_64b_secret },
- { "XXH128", &localXXH3_128b },
- { "XXH128 w/seed", &localXXH3_128b_seeded },
- { "XXH128 w/secret", &localXXH3_128b_secret },
- { "XXH32_stream", &localXXH32_stream },
- { "XXH64_stream", &localXXH64_stream },
- { "XXH3_stream", &localXXH3_stream },
- { "XXH3_stream w/seed",&localXXH3_stream_seeded },
- { "XXH128_stream", &localXXH128_stream },
- { "XXH128_stream w/seed",&localXXH128_stream_seeded },
- };
- #define NB_HASHFUNC (sizeof(g_hashesToBench) / sizeof(*g_hashesToBench))
- #define NB_TESTFUNC (1 + 2 * NB_HASHFUNC)
- int const g_nbTestFunctions = NB_TESTFUNC;
- char g_testIDs[NB_TESTFUNC] = { 0 };
- const char k_testIDs_default[NB_TESTFUNC] = { 0,
- 1 /*XXH32*/, 0,
- 1 /*XXH64*/, 0,
- 1 /*XXH3*/, 0, 0, 0, 0, 0,
- 1 /*XXH128*/ };
- int g_nbIterations = NBLOOPS_DEFAULT;
- #define HASHNAME_MAX 29
- static void XSUM_benchHash(hashFunction h, const char* hName, int testID,
- const void* buffer, size_t bufferSize)
- {
- XSUM_U32 nbh_perIteration = (XSUM_U32)((XXH_1ST_SPEED_TARGET MB) / (bufferSize+1)) + 1;
- int iterationNb, nbIterations = g_nbIterations + !g_nbIterations /* min 1 */;
- double fastestH = 100000000.;
- assert(HASHNAME_MAX > 2);
- XSUM_logVerbose(2, "\r%80s\r", ""); /* Clean display line */
- for (iterationNb = 1; iterationNb <= nbIterations; iterationNb++) {
- XSUM_U32 r=0;
- clock_t cStart;
- XSUM_logVerbose(2, "%2i-%-*.*s : %10u ->\r",
- iterationNb,
- HASHNAME_MAX, HASHNAME_MAX, hName,
- (unsigned)bufferSize);
- cStart = clock();
- while (clock() == cStart); /* starts clock() at its exact beginning */
- cStart = clock();
- { XSUM_U32 u;
- for (u=0; u<nbh_perIteration; u++)
- r += h(buffer, bufferSize, u);
- }
- if (r==0) XSUM_logVerbose(3,".\r"); /* do something with r to defeat compiler "optimizing" hash away */
- { clock_t const nbTicks = XSUM_clockSpan(cStart);
- double const ticksPerHash = ((double)nbTicks / TIMELOOP) / nbh_perIteration;
- /*
- * clock() is the only decent portable timer, but it isn't very
- * precise.
- *
- * Sometimes, this lack of precision is enough that the benchmark
- * finishes before there are enough ticks to get a meaningful result.
- *
- * For example, on a Core 2 Duo (without any sort of Turbo Boost),
- * the imprecise timer caused peculiar results like so:
- *
- * XXH3_64b 4800.0 MB/s // conveniently even
- * XXH3_64b unaligned 4800.0 MB/s
- * XXH3_64b seeded 9600.0 MB/s // magical 2x speedup?!
- * XXH3_64b seeded unaligned 4800.0 MB/s
- *
- * If we sense a suspiciously low number of ticks, we increase the
- * iterations until we can get something meaningful.
- */
- if (nbTicks < TIMELOOP_MIN) {
- /* Not enough time spent in benchmarking, risk of rounding bias */
- if (nbTicks == 0) { /* faster than resolution timer */
- nbh_perIteration *= 100;
- } else {
- /*
- * update nbh_perIteration so that the next round lasts
- * approximately 1 second.
- */
- double nbh_perSecond = (1 / ticksPerHash) + 1;
- if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */
- nbh_perIteration = (XSUM_U32)nbh_perSecond;
- }
- /* g_nbIterations==0 => quick evaluation, no claim of accuracy */
- if (g_nbIterations>0) {
- iterationNb--; /* new round for a more accurate speed evaluation */
- continue;
- }
- }
- if (ticksPerHash < fastestH) fastestH = ticksPerHash;
- if (fastestH>0.) { /* avoid div by zero */
- XSUM_logVerbose(2, "%2i-%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
- iterationNb,
- HASHNAME_MAX, HASHNAME_MAX, hName,
- (unsigned)bufferSize,
- (double)1 / fastestH,
- ((double)bufferSize / (1 MB)) / fastestH);
- } }
- { double nbh_perSecond = (1 / fastestH) + 1;
- if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */
- nbh_perIteration = (XSUM_U32)nbh_perSecond;
- }
- }
- XSUM_logVerbose(1, "%2i#%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \n",
- testID,
- HASHNAME_MAX, HASHNAME_MAX, hName,
- (unsigned)bufferSize,
- (double)1 / fastestH,
- ((double)bufferSize / (1 MB)) / fastestH);
- if (XSUM_logLevel<1)
- XSUM_logVerbose(0, "%u, ", (unsigned)((double)1 / fastestH));
- }
- /*
- * Allocates a string containing s1 and s2 concatenated. Acts like strdup.
- * The result must be freed.
- */
- static char* XSUM_strcatDup(const char* s1, const char* s2)
- {
- assert(s1 != NULL);
- assert(s2 != NULL);
- { size_t len1 = strlen(s1);
- size_t len2 = strlen(s2);
- char* buf = (char*)malloc(len1 + len2 + 1);
- if (buf != NULL) {
- /* strcpy(buf, s1) */
- memcpy(buf, s1, len1);
- /* strcat(buf, s2) */
- memcpy(buf + len1, s2, len2 + 1);
- }
- return buf;
- }
- }
- /*!
- * XSUM_benchMem():
- * buffer: Must be 16-byte aligned.
- * The real allocated size of buffer is supposed to be >= (bufferSize+3).
- * returns: 0 on success, 1 if error (invalid mode selected)
- */
- static void XSUM_benchMem(const void* buffer, size_t bufferSize)
- {
- assert((((size_t)buffer) & 15) == 0); /* ensure alignment */
- XSUM_fillTestBuffer(g_benchSecretBuf, sizeof(g_benchSecretBuf));
- { int i;
- for (i = 1; i < (int)NB_TESTFUNC; i++) {
- int const hashFuncID = (i-1) / 2;
- assert(g_hashesToBench[hashFuncID].name != NULL);
- if (g_testIDs[i] == 0) continue;
- /* aligned */
- if ((i % 2) == 1) {
- XSUM_benchHash(g_hashesToBench[hashFuncID].func, g_hashesToBench[hashFuncID].name, i, buffer, bufferSize);
- }
- /* unaligned */
- if ((i % 2) == 0) {
- /* Append "unaligned". */
- char* const hashNameBuf = XSUM_strcatDup(g_hashesToBench[hashFuncID].name, " unaligned");
- assert(hashNameBuf != NULL);
- XSUM_benchHash(g_hashesToBench[hashFuncID].func, hashNameBuf, i, ((const char*)buffer)+3, bufferSize);
- free(hashNameBuf);
- }
- } }
- }
- static size_t XSUM_selectBenchedSize(const char* fileName)
- {
- XSUM_U64 const inFileSize = XSUM_getFileSize(fileName);
- size_t benchedSize = (size_t) XSUM_findMaxMem(inFileSize);
- if ((XSUM_U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
- if (benchedSize < inFileSize) {
- XSUM_log("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20));
- }
- return benchedSize;
- }
- int XSUM_benchFiles(const char* fileNamesTable[], int nbFiles)
- {
- int fileIdx;
- for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
- const char* const inFileName = fileNamesTable[fileIdx];
- assert(inFileName != NULL);
- { FILE* const inFile = XSUM_fopen( inFileName, "rb" );
- size_t const benchedSize = XSUM_selectBenchedSize(inFileName);
- char* const buffer = (char*)calloc(benchedSize+16+3, 1);
- void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes */
- /* Checks */
- if (inFile==NULL){
- XSUM_log("Error: Could not open '%s': %s.\n", inFileName, strerror(errno));
- free(buffer);
- exit(11);
- }
- if(!buffer) {
- XSUM_log("\nError: Out of memory.\n");
- fclose(inFile);
- exit(12);
- }
- /* Fill input buffer */
- { size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
- fclose(inFile);
- if(readSize != benchedSize) {
- XSUM_log("\nError: Could not read '%s': %s.\n", inFileName, strerror(errno));
- free(buffer);
- exit(13);
- } }
- /* bench */
- XSUM_benchMem(alignedBuffer, benchedSize);
- free(buffer);
- } }
- return 0;
- }
- int XSUM_benchInternal(size_t keySize)
- {
- void* const buffer = calloc(keySize+16+3, 1);
- if (buffer == NULL) {
- XSUM_log("\nError: Out of memory.\n");
- exit(12);
- }
- { const void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF); /* align on next 16 bytes */
- /* bench */
- XSUM_logVerbose(1, "Sample of ");
- if (keySize > 10 KB) {
- XSUM_logVerbose(1, "%u KB", (unsigned)(keySize >> 10));
- } else {
- XSUM_logVerbose(1, "%u bytes", (unsigned)keySize);
- }
- XSUM_logVerbose(1, "... \n");
- XSUM_benchMem(alignedBuffer, keySize);
- free(buffer);
- }
- return 0;
- }
|