xxh_x86dispatch.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. /*
  2. * xxHash - Extremely Fast Hash algorithm
  3. * Copyright (C) 2020-2021 Yann Collet
  4. *
  5. * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are
  9. * met:
  10. *
  11. * * Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above
  14. * copyright notice, this list of conditions and the following disclaimer
  15. * in the documentation and/or other materials provided with the
  16. * distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * You can contact the author at:
  31. * - xxHash homepage: https://www.xxhash.com
  32. * - xxHash source repository: https://github.com/Cyan4973/xxHash
  33. */
  34. /*!
  35. * @file xxh_x86dispatch.c
  36. *
  37. * Automatic dispatcher code for the @ref XXH3_family on x86-based targets.
  38. *
  39. * Optional add-on.
  40. *
  41. * **Compile this file with the default flags for your target.** Do not compile
  42. * with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`, there will be
  43. * an error. See @ref XXH_X86DISPATCH_ALLOW_AVX for details.
  44. *
  45. * @defgroup dispatch x86 Dispatcher
  46. * @{
  47. */
  48. #if defined (__cplusplus)
  49. extern "C" {
  50. #endif
  51. #if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64))
  52. # error "Dispatching is currently only supported on x86 and x86_64."
  53. #endif
  54. /*! @cond Doxygen ignores this part */
  55. #ifndef XXH_HAS_INCLUDE
  56. # ifdef __has_include
  57. # define XXH_HAS_INCLUDE(x) __has_include(x)
  58. # else
  59. # define XXH_HAS_INCLUDE(x) 0
  60. # endif
  61. #endif
  62. /*! @endcond */
  63. /*!
  64. * @def XXH_X86DISPATCH_ALLOW_AVX
  65. * @brief Disables the AVX sanity check.
  66. *
  67. * xxh_x86dispatch.c is intended to be compiled for the minimum target, and
  68. * it selectively enables SSE2, AVX2, and AVX512 when it is needed.
  69. *
  70. * Compiling with options like `-mavx*`, `-march=native`, or `/arch:AVX*`
  71. * _globally_ will always enable this feature, and therefore makes it
  72. * undefined behavior to execute on any CPU without said feature.
  73. *
  74. * Even if the source code isn't directly using AVX intrinsics in a function,
  75. * the compiler can still generate AVX code from autovectorization and by
  76. * "upgrading" SSE2 intrinsics to use the VEX prefixes (a.k.a. AVX128).
  77. *
  78. * Define XXH_X86DISPATCH_ALLOW_AVX to ignore this check,
  79. * thus accepting that the produced binary will not work correctly
  80. * on any CPU with less features than the ones stated at compilation time.
  81. */
  82. #ifdef XXH_DOXYGEN
  83. # define XXH_X86DISPATCH_ALLOW_AVX
  84. #endif
  85. #if defined(__AVX__) && !defined(XXH_X86DISPATCH_ALLOW_AVX)
  86. # error "Error: if xxh_x86dispatch.c is compiled with AVX enabled, the resulting binary will crash on sse2-only cpus !! " \
  87. "If you nonetheless want to do that, please enable the XXH_X86DISPATCH_ALLOW_AVX build variable"
  88. #endif
  89. /*!
  90. * @def XXH_DISPATCH_SCALAR
  91. * @brief Enables/dispatching the scalar code path.
  92. *
  93. * If this is defined to 0, SSE2 support is assumed. This reduces code size
  94. * when the scalar path is not needed.
  95. *
  96. * This is automatically defined to 0 when...
  97. * - SSE2 support is enabled in the compiler
  98. * - Targeting x86_64
  99. * - Targeting Android x86
  100. * - Targeting macOS
  101. */
  102. #ifndef XXH_DISPATCH_SCALAR
  103. # if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \
  104. || defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \
  105. || defined(__ANDROID__) || defined(__APPLE__) /* Android or macOS */
  106. # define XXH_DISPATCH_SCALAR 0 /* disable */
  107. # else
  108. # define XXH_DISPATCH_SCALAR 1
  109. # endif
  110. #endif
  111. /*!
  112. * @def XXH_DISPATCH_AVX2
  113. * @brief Enables/disables dispatching for AVX2.
  114. *
  115. * This is automatically detected if it is not defined.
  116. * - GCC 4.7 and later are known to support AVX2, but >4.9 is required for
  117. * to get the AVX2 intrinsics and typedefs without -mavx -mavx2.
  118. * - Visual Studio 2013 Update 2 and later are known to support AVX2.
  119. * - The GCC/Clang internal header `<avx2intrin.h>` is detected. While this is
  120. * not allowed to be included directly, it still appears in the builtin
  121. * include path and is detectable with `__has_include`.
  122. *
  123. * @see XXH_AVX2
  124. */
  125. #ifndef XXH_DISPATCH_AVX2
  126. # if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \
  127. || (defined(_MSC_VER) && _MSC_VER >= 1900) /* VS 2015+ */ \
  128. || (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501) /* VS 2013 Update 2 */ \
  129. || XXH_HAS_INCLUDE(<avx2intrin.h>) /* GCC/Clang internal header */
  130. # define XXH_DISPATCH_AVX2 1 /* enable dispatch towards AVX2 */
  131. # else
  132. # define XXH_DISPATCH_AVX2 0
  133. # endif
  134. #endif /* XXH_DISPATCH_AVX2 */
  135. /*!
  136. * @def XXH_DISPATCH_AVX512
  137. * @brief Enables/disables dispatching for AVX512.
  138. *
  139. * Automatically detected if one of the following conditions is met:
  140. * - GCC 4.9 and later are known to support AVX512.
  141. * - Visual Studio 2017 and later are known to support AVX2.
  142. * - The GCC/Clang internal header `<avx512fintrin.h>` is detected. While this
  143. * is not allowed to be included directly, it still appears in the builtin
  144. * include path and is detectable with `__has_include`.
  145. *
  146. * @see XXH_AVX512
  147. */
  148. #ifndef XXH_DISPATCH_AVX512
  149. # if (defined(__GNUC__) \
  150. && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \
  151. || (defined(_MSC_VER) && _MSC_VER >= 1910) /* VS 2017+ */ \
  152. || XXH_HAS_INCLUDE(<avx512fintrin.h>) /* GCC/Clang internal header */
  153. # define XXH_DISPATCH_AVX512 1 /* enable dispatch towards AVX512 */
  154. # else
  155. # define XXH_DISPATCH_AVX512 0
  156. # endif
  157. #endif /* XXH_DISPATCH_AVX512 */
  158. /*!
  159. * @def XXH_TARGET_SSE2
  160. * @brief Allows a function to be compiled with SSE2 intrinsics.
  161. *
  162. * Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used
  163. * even with `-mno-sse2`.
  164. *
  165. * @def XXH_TARGET_AVX2
  166. * @brief Like @ref XXH_TARGET_SSE2, but for AVX2.
  167. *
  168. * @def XXH_TARGET_AVX512
  169. * @brief Like @ref XXH_TARGET_SSE2, but for AVX512.
  170. *
  171. */
  172. #if defined(__GNUC__)
  173. # include <emmintrin.h> /* SSE2 */
  174. # if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
  175. # include <immintrin.h> /* AVX2, AVX512F */
  176. # endif
  177. # define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
  178. # define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
  179. # define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
  180. #elif defined(__clang__) && defined(_MSC_VER) /* clang-cl.exe */
  181. # include <emmintrin.h> /* SSE2 */
  182. # if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
  183. # include <immintrin.h> /* AVX2, AVX512F */
  184. # include <smmintrin.h>
  185. # include <avxintrin.h>
  186. # include <avx2intrin.h>
  187. # include <avx512fintrin.h>
  188. # endif
  189. # define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
  190. # define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
  191. # define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
  192. #elif defined(_MSC_VER)
  193. # include <intrin.h>
  194. # define XXH_TARGET_SSE2
  195. # define XXH_TARGET_AVX2
  196. # define XXH_TARGET_AVX512
  197. #else
  198. # error "Dispatching is currently not supported for your compiler."
  199. #endif
  200. /*! @cond Doxygen ignores this part */
  201. #ifdef XXH_DISPATCH_DEBUG
  202. /* debug logging */
  203. # include <stdio.h>
  204. # define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); }
  205. #else
  206. # define XXH_debugPrint(str) ((void)0)
  207. # undef NDEBUG /* avoid redefinition */
  208. # define NDEBUG
  209. #endif
  210. /*! @endcond */
  211. #include <assert.h>
  212. #ifndef XXH_DOXYGEN
  213. #define XXH_INLINE_ALL
  214. #define XXH_X86DISPATCH
  215. #include "xxhash.h"
  216. #endif
  217. /*! @cond Doxygen ignores this part */
  218. #ifndef XXH_HAS_ATTRIBUTE
  219. # ifdef __has_attribute
  220. # define XXH_HAS_ATTRIBUTE(...) __has_attribute(__VA_ARGS__)
  221. # else
  222. # define XXH_HAS_ATTRIBUTE(...) 0
  223. # endif
  224. #endif
  225. /*! @endcond */
  226. /*! @cond Doxygen ignores this part */
  227. #if XXH_HAS_ATTRIBUTE(constructor)
  228. # define XXH_CONSTRUCTOR __attribute__((constructor))
  229. # define XXH_DISPATCH_MAYBE_NULL 0
  230. #else
  231. # define XXH_CONSTRUCTOR
  232. # define XXH_DISPATCH_MAYBE_NULL 1
  233. #endif
  234. /*! @endcond */
  235. /*! @cond Doxygen ignores this part */
  236. /*
  237. * Support both AT&T and Intel dialects
  238. *
  239. * GCC doesn't convert AT&T syntax to Intel syntax, and will error out if
  240. * compiled with -masm=intel. Instead, it supports dialect switching with
  241. * curly braces: { AT&T syntax | Intel syntax }
  242. *
  243. * Clang's integrated assembler automatically converts AT&T syntax to Intel if
  244. * needed, making the dialect switching useless (it isn't even supported).
  245. *
  246. * Note: Comments are written in the inline assembly itself.
  247. */
  248. #ifdef __clang__
  249. # define XXH_I_ATT(intel, att) att "\n\t"
  250. #else
  251. # define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t"
  252. #endif
  253. /*! @endcond */
  254. /*!
  255. * @private
  256. * @brief Runs CPUID.
  257. *
  258. * @param eax , ecx The parameters to pass to CPUID, %eax and %ecx respectively.
  259. * @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }`
  260. */
  261. static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd)
  262. {
  263. #if defined(_MSC_VER)
  264. __cpuidex((int*)abcd, eax, ecx);
  265. #else
  266. xxh_u32 ebx, edx;
  267. # if defined(__i386__) && defined(__PIC__)
  268. __asm__(
  269. "# Call CPUID\n\t"
  270. "#\n\t"
  271. "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t"
  272. "# EBX, so we use EDI instead.\n\t"
  273. XXH_I_ATT("mov edi, ebx", "movl %%ebx, %%edi")
  274. XXH_I_ATT("cpuid", "cpuid" )
  275. XXH_I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi")
  276. : "=D" (ebx),
  277. # else
  278. __asm__(
  279. "# Call CPUID\n\t"
  280. XXH_I_ATT("cpuid", "cpuid")
  281. : "=b" (ebx),
  282. # endif
  283. "+a" (eax), "+c" (ecx), "=d" (edx));
  284. abcd[0] = eax;
  285. abcd[1] = ebx;
  286. abcd[2] = ecx;
  287. abcd[3] = edx;
  288. #endif
  289. }
  290. /*
  291. * Modified version of Intel's guide
  292. * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
  293. */
  294. #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
  295. /*!
  296. * @private
  297. * @brief Runs `XGETBV`.
  298. *
  299. * While the CPU may support AVX2, the operating system might not properly save
  300. * the full YMM/ZMM registers.
  301. *
  302. * xgetbv is used for detecting this: Any compliant operating system will define
  303. * a set of flags in the xcr0 register indicating how it saves the AVX registers.
  304. *
  305. * You can manually disable this flag on Windows by running, as admin:
  306. *
  307. * bcdedit.exe /set xsavedisable 1
  308. *
  309. * and rebooting. Run the same command with 0 to re-enable it.
  310. */
  311. static xxh_u64 XXH_xgetbv(void)
  312. {
  313. #if defined(_MSC_VER)
  314. return _xgetbv(0); /* min VS2010 SP1 compiler is required */
  315. #else
  316. xxh_u32 xcr0_lo, xcr0_hi;
  317. __asm__(
  318. "# Call XGETBV\n\t"
  319. "#\n\t"
  320. "# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t"
  321. "# the XGETBV opcode, so we encode it by hand instead.\n\t"
  322. "# See <https://github.com/asmjit/asmjit/issues/78> for details.\n\t"
  323. ".byte 0x0f, 0x01, 0xd0\n\t"
  324. : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
  325. return xcr0_lo | ((xxh_u64)xcr0_hi << 32);
  326. #endif
  327. }
  328. #endif
  329. /*! @cond Doxygen ignores this part */
  330. #define XXH_SSE2_CPUID_MASK (1 << 26)
  331. #define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27))
  332. #define XXH_AVX2_CPUID_MASK (1 << 5)
  333. #define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1))
  334. #define XXH_AVX512F_CPUID_MASK (1 << 16)
  335. #define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1))
  336. /*! @endcond */
  337. /*!
  338. * @private
  339. * @brief Returns the best XXH3 implementation.
  340. *
  341. * Runs various CPUID/XGETBV tests to try and determine the best implementation.
  342. *
  343. * @return The best @ref XXH_VECTOR implementation.
  344. * @see XXH_VECTOR_TYPES
  345. */
  346. static int XXH_featureTest(void)
  347. {
  348. xxh_u32 abcd[4];
  349. xxh_u32 max_leaves;
  350. int best = XXH_SCALAR;
  351. #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
  352. xxh_u64 xgetbv_val;
  353. #endif
  354. #if defined(__GNUC__) && defined(__i386__)
  355. xxh_u32 cpuid_supported;
  356. __asm__(
  357. "# For the sake of ruthless backwards compatibility, check if CPUID\n\t"
  358. "# is supported in the EFLAGS on i386.\n\t"
  359. "# This is not necessary on x86_64 - CPUID is mandatory.\n\t"
  360. "# The ID flag (bit 21) in the EFLAGS register indicates support\n\t"
  361. "# for the CPUID instruction. If a software procedure can set and\n\t"
  362. "# clear this flag, the processor executing the procedure supports\n\t"
  363. "# the CPUID instruction.\n\t"
  364. "# <https://c9x.me/x86/html/file_module_x86_id_45.html>\n\t"
  365. "#\n\t"
  366. "# Routine is from <https://wiki.osdev.org/CPUID>.\n\t"
  367. "# Save EFLAGS\n\t"
  368. XXH_I_ATT("pushfd", "pushfl" )
  369. "# Store EFLAGS\n\t"
  370. XXH_I_ATT("pushfd", "pushfl" )
  371. "# Invert the ID bit in stored EFLAGS\n\t"
  372. XXH_I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)")
  373. "# Load stored EFLAGS (with ID bit inverted)\n\t"
  374. XXH_I_ATT("popfd", "popfl" )
  375. "# Store EFLAGS again (ID bit may or not be inverted)\n\t"
  376. XXH_I_ATT("pushfd", "pushfl" )
  377. "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t"
  378. XXH_I_ATT("pop eax", "popl %%eax" )
  379. "# eax = whichever bits were changed\n\t"
  380. XXH_I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" )
  381. "# Restore original EFLAGS\n\t"
  382. XXH_I_ATT("popfd", "popfl" )
  383. "# eax = zero if ID bit can't be changed, else non-zero\n\t"
  384. XXH_I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" )
  385. : "=a" (cpuid_supported) :: "cc");
  386. if (XXH_unlikely(!cpuid_supported)) {
  387. XXH_debugPrint("CPUID support is not detected!");
  388. return best;
  389. }
  390. #endif
  391. /* Check how many CPUID pages we have */
  392. XXH_cpuid(0, 0, abcd);
  393. max_leaves = abcd[0];
  394. /* Shouldn't happen on hardware, but happens on some QEMU configs. */
  395. if (XXH_unlikely(max_leaves == 0)) {
  396. XXH_debugPrint("Max CPUID leaves == 0!");
  397. return best;
  398. }
  399. /* Check for SSE2, OSXSAVE and xgetbv */
  400. XXH_cpuid(1, 0, abcd);
  401. /*
  402. * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt.
  403. */
  404. if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK))
  405. return best;
  406. XXH_debugPrint("SSE2 support detected.");
  407. best = XXH_SSE2;
  408. #if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
  409. /* Make sure we have enough leaves */
  410. if (XXH_unlikely(max_leaves < 7))
  411. return best;
  412. /* Test for OSXSAVE and XGETBV */
  413. if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK)
  414. return best;
  415. /* CPUID check for AVX features */
  416. XXH_cpuid(7, 0, abcd);
  417. xgetbv_val = XXH_xgetbv();
  418. #if XXH_DISPATCH_AVX2
  419. /* Validate that AVX2 is supported by the CPU */
  420. if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK)
  421. return best;
  422. /* Validate that the OS supports YMM registers */
  423. if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) {
  424. XXH_debugPrint("AVX2 supported by the CPU, but not the OS.");
  425. return best;
  426. }
  427. /* AVX2 supported */
  428. XXH_debugPrint("AVX2 support detected.");
  429. best = XXH_AVX2;
  430. #endif
  431. #if XXH_DISPATCH_AVX512
  432. /* Check if AVX512F is supported by the CPU */
  433. if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) {
  434. XXH_debugPrint("AVX512F not supported by CPU");
  435. return best;
  436. }
  437. /* Validate that the OS supports ZMM registers */
  438. if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) {
  439. XXH_debugPrint("AVX512F supported by the CPU, but not the OS.");
  440. return best;
  441. }
  442. /* AVX512F supported */
  443. XXH_debugPrint("AVX512F support detected.");
  444. best = XXH_AVX512;
  445. #endif
  446. #endif
  447. return best;
  448. }
  449. /* === Vector implementations === */
  450. /*! @cond PRIVATE */
  451. /*!
  452. * @private
  453. * @brief Defines the various dispatch functions.
  454. *
  455. * TODO: Consolidate?
  456. *
  457. * @param suffix The suffix for the functions, e.g. sse2 or scalar
  458. * @param target XXH_TARGET_* or empty.
  459. */
  460. #define XXH_DEFINE_DISPATCH_FUNCS(suffix, target) \
  461. \
  462. /* === XXH3, default variants === */ \
  463. \
  464. XXH_NO_INLINE target XXH64_hash_t \
  465. XXHL64_default_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \
  466. size_t len) \
  467. { \
  468. return XXH3_hashLong_64b_internal( \
  469. input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \
  470. XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix \
  471. ); \
  472. } \
  473. \
  474. /* === XXH3, Seeded variants === */ \
  475. \
  476. XXH_NO_INLINE target XXH64_hash_t \
  477. XXHL64_seed_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, size_t len, \
  478. XXH64_hash_t seed) \
  479. { \
  480. return XXH3_hashLong_64b_withSeed_internal( \
  481. input, len, seed, XXH3_accumulate_##suffix, \
  482. XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \
  483. ); \
  484. } \
  485. \
  486. /* === XXH3, Secret variants === */ \
  487. \
  488. XXH_NO_INLINE target XXH64_hash_t \
  489. XXHL64_secret_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \
  490. size_t len, XXH_NOESCAPE const void* secret, \
  491. size_t secretLen) \
  492. { \
  493. return XXH3_hashLong_64b_internal( \
  494. input, len, secret, secretLen, \
  495. XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix \
  496. ); \
  497. } \
  498. \
  499. /* === XXH3 update variants === */ \
  500. \
  501. XXH_NO_INLINE target XXH_errorcode \
  502. XXH3_update_##suffix(XXH_NOESCAPE XXH3_state_t* state, \
  503. XXH_NOESCAPE const void* input, size_t len) \
  504. { \
  505. return XXH3_update(state, (const xxh_u8*)input, len, \
  506. XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix); \
  507. } \
  508. \
  509. /* === XXH128 default variants === */ \
  510. \
  511. XXH_NO_INLINE target XXH128_hash_t \
  512. XXHL128_default_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \
  513. size_t len) \
  514. { \
  515. return XXH3_hashLong_128b_internal( \
  516. input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \
  517. XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix \
  518. ); \
  519. } \
  520. \
  521. /* === XXH128 Secret variants === */ \
  522. \
  523. XXH_NO_INLINE target XXH128_hash_t \
  524. XXHL128_secret_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, \
  525. size_t len, \
  526. XXH_NOESCAPE const void* XXH_RESTRICT secret, \
  527. size_t secretLen) \
  528. { \
  529. return XXH3_hashLong_128b_internal( \
  530. input, len, (const xxh_u8*)secret, secretLen, \
  531. XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix); \
  532. } \
  533. \
  534. /* === XXH128 Seeded variants === */ \
  535. \
  536. XXH_NO_INLINE target XXH128_hash_t \
  537. XXHL128_seed_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, size_t len,\
  538. XXH64_hash_t seed) \
  539. { \
  540. return XXH3_hashLong_128b_withSeed_internal(input, len, seed, \
  541. XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix, \
  542. XXH3_initCustomSecret_##suffix); \
  543. }
  544. /*! @endcond */
  545. /* End XXH_DEFINE_DISPATCH_FUNCS */
  546. /*! @cond Doxygen ignores this part */
  547. #if XXH_DISPATCH_SCALAR
  548. XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */)
  549. #endif
  550. XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2)
  551. #if XXH_DISPATCH_AVX2
  552. XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2)
  553. #endif
  554. #if XXH_DISPATCH_AVX512
  555. XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512)
  556. #endif
  557. #undef XXH_DEFINE_DISPATCH_FUNCS
  558. /*! @endcond */
  559. /* ==== Dispatchers ==== */
  560. /*! @cond Doxygen ignores this part */
  561. typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
  562. typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH64_hash_t);
  563. typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
  564. typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH_NOESCAPE XXH3_state_t*, XXH_NOESCAPE const void*, size_t);
  565. typedef struct {
  566. XXH3_dispatchx86_hashLong64_default hashLong64_default;
  567. XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed;
  568. XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret;
  569. XXH3_dispatchx86_update update;
  570. } XXH_dispatchFunctions_s;
  571. #define XXH_NB_DISPATCHES 4
  572. /*! @endcond */
  573. /*!
  574. * @private
  575. * @brief Table of dispatchers for @ref XXH3_64bits().
  576. *
  577. * @pre The indices must match @ref XXH_VECTOR_TYPE.
  578. */
  579. static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = {
  580. #if XXH_DISPATCH_SCALAR
  581. /* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar },
  582. #else
  583. /* Scalar */ { NULL, NULL, NULL, NULL },
  584. #endif
  585. /* SSE2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_update_sse2 },
  586. #if XXH_DISPATCH_AVX2
  587. /* AVX2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_update_avx2 },
  588. #else
  589. /* AVX2 */ { NULL, NULL, NULL, NULL },
  590. #endif
  591. #if XXH_DISPATCH_AVX512
  592. /* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 }
  593. #else
  594. /* AVX512 */ { NULL, NULL, NULL, NULL }
  595. #endif
  596. };
  597. /*!
  598. * @private
  599. * @brief The selected dispatch table for @ref XXH3_64bits().
  600. */
  601. static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL };
  602. /*! @cond Doxygen ignores this part */
  603. typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
  604. typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH64_hash_t);
  605. typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
  606. typedef struct {
  607. XXH3_dispatchx86_hashLong128_default hashLong128_default;
  608. XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed;
  609. XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret;
  610. XXH3_dispatchx86_update update;
  611. } XXH_dispatch128Functions_s;
  612. /*! @endcond */
  613. /*!
  614. * @private
  615. * @brief Table of dispatchers for @ref XXH3_128bits().
  616. *
  617. * @pre The indices must match @ref XXH_VECTOR_TYPE.
  618. */
  619. static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = {
  620. #if XXH_DISPATCH_SCALAR
  621. /* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar },
  622. #else
  623. /* Scalar */ { NULL, NULL, NULL, NULL },
  624. #endif
  625. /* SSE2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_update_sse2 },
  626. #if XXH_DISPATCH_AVX2
  627. /* AVX2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_update_avx2 },
  628. #else
  629. /* AVX2 */ { NULL, NULL, NULL, NULL },
  630. #endif
  631. #if XXH_DISPATCH_AVX512
  632. /* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 }
  633. #else
  634. /* AVX512 */ { NULL, NULL, NULL, NULL }
  635. #endif
  636. };
  637. /*!
  638. * @private
  639. * @brief The selected dispatch table for @ref XXH3_64bits().
  640. */
  641. static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL };
  642. /*!
  643. * @private
  644. * @brief Runs a CPUID check and sets the correct dispatch tables.
  645. */
  646. static XXH_CONSTRUCTOR void XXH_setDispatch(void)
  647. {
  648. int vecID = XXH_featureTest();
  649. XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1);
  650. assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512);
  651. #if !XXH_DISPATCH_SCALAR
  652. assert(vecID != XXH_SCALAR);
  653. #endif
  654. #if !XXH_DISPATCH_AVX512
  655. assert(vecID != XXH_AVX512);
  656. #endif
  657. #if !XXH_DISPATCH_AVX2
  658. assert(vecID != XXH_AVX2);
  659. #endif
  660. XXH_g_dispatch = XXH_kDispatch[vecID];
  661. XXH_g_dispatch128 = XXH_kDispatch128[vecID];
  662. }
  663. /* ==== XXH3 public functions ==== */
  664. /*! @cond Doxygen ignores this part */
  665. static XXH64_hash_t
  666. XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len,
  667. XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
  668. {
  669. (void)seed64; (void)secret; (void)secretLen;
  670. if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_default == NULL)
  671. XXH_setDispatch();
  672. return XXH_g_dispatch.hashLong64_default(input, len);
  673. }
  674. XXH64_hash_t XXH3_64bits_dispatch(XXH_NOESCAPE const void* input, size_t len)
  675. {
  676. return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection);
  677. }
  678. static XXH64_hash_t
  679. XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len,
  680. XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
  681. {
  682. (void)secret; (void)secretLen;
  683. if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_seed == NULL)
  684. XXH_setDispatch();
  685. return XXH_g_dispatch.hashLong64_seed(input, len, seed64);
  686. }
  687. XXH64_hash_t XXH3_64bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
  688. {
  689. return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection);
  690. }
  691. static XXH64_hash_t
  692. XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len,
  693. XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
  694. {
  695. (void)seed64;
  696. if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_secret == NULL)
  697. XXH_setDispatch();
  698. return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen);
  699. }
  700. XXH64_hash_t XXH3_64bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen)
  701. {
  702. return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection);
  703. }
  704. XXH_errorcode
  705. XXH3_64bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
  706. {
  707. if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.update == NULL)
  708. XXH_setDispatch();
  709. return XXH_g_dispatch.update(state, (const xxh_u8*)input, len);
  710. }
  711. /*! @endcond */
  712. /* ==== XXH128 public functions ==== */
  713. /*! @cond Doxygen ignores this part */
  714. static XXH128_hash_t
  715. XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len,
  716. XXH64_hash_t seed64, const void* secret, size_t secretLen)
  717. {
  718. (void)seed64; (void)secret; (void)secretLen;
  719. if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_default == NULL)
  720. XXH_setDispatch();
  721. return XXH_g_dispatch128.hashLong128_default(input, len);
  722. }
  723. XXH128_hash_t XXH3_128bits_dispatch(XXH_NOESCAPE const void* input, size_t len)
  724. {
  725. return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection);
  726. }
  727. static XXH128_hash_t
  728. XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len,
  729. XXH64_hash_t seed64, const void* secret, size_t secretLen)
  730. {
  731. (void)secret; (void)secretLen;
  732. if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_seed == NULL)
  733. XXH_setDispatch();
  734. return XXH_g_dispatch128.hashLong128_seed(input, len, seed64);
  735. }
  736. XXH128_hash_t XXH3_128bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
  737. {
  738. return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection);
  739. }
  740. static XXH128_hash_t
  741. XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len,
  742. XXH64_hash_t seed64, const void* secret, size_t secretLen)
  743. {
  744. (void)seed64;
  745. if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_secret == NULL)
  746. XXH_setDispatch();
  747. return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen);
  748. }
  749. XXH128_hash_t XXH3_128bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen)
  750. {
  751. return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection);
  752. }
  753. XXH_errorcode
  754. XXH3_128bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
  755. {
  756. if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.update == NULL)
  757. XXH_setDispatch();
  758. return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len);
  759. }
  760. /*! @endcond */
  761. #if defined (__cplusplus)
  762. }
  763. #endif
  764. /*! @} */