bench.mm 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728
  1. /*
  2. * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  3. *
  4. * @APPLE_APACHE_LICENSE_HEADER_START@
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. *
  18. * @APPLE_APACHE_LICENSE_HEADER_END@
  19. */
  20. #include <Foundation/Foundation.h>
  21. #include <libkern/OSAtomic.h>
  22. #ifdef __ANDROID__
  23. #include <linux/sysctl.h>
  24. #else
  25. #if !defined(__linux__)
  26. #include <sys/sysctl.h>
  27. #endif
  28. #endif /* __ANDROID__ */
  29. #include <mach/mach.h>
  30. #include <mach/mach_time.h>
  31. #include <stdio.h>
  32. #include <stdlib.h>
  33. #include <stdint.h>
  34. #include <stdbool.h>
  35. #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
  36. #include <unistd.h>
  37. #endif
  38. #include <assert.h>
  39. #include <errno.h>
  40. #include <pthread.h>
  41. #include <math.h>
  42. #ifdef __BLOCKS__
  43. #include <Block.h>
  44. #endif
  45. #include <dispatch/dispatch.h>
  46. #include <dispatch/private.h>
  47. //#define BENCH_SLOW 1
  48. extern "C" {
  49. __private_extern__ void func(void);
  50. #ifdef __BLOCKS__
  51. __private_extern__ void (^block)(void);
  52. #endif
  53. static void backflip(void *ctxt);
  54. static void backflip_done(void);
  55. }
  56. @interface BasicObject : NSObject
  57. {
  58. }
  59. - (void) method;
  60. @end
  61. @implementation BasicObject
  62. - (void) method
  63. {
  64. }
  65. @end
  66. class BasicClass {
  67. public:
  68. virtual void virtfunc(void) {
  69. };
  70. };
  71. static void *
  72. force_a_thread(void *arg)
  73. {
  74. pause();
  75. abort();
  76. return arg;
  77. }
  78. static volatile int32_t global;
  79. static volatile int64_t w_global;
  80. #if TARGET_OS_EMBEDDED
  81. static const size_t cnt = 5000000;
  82. #else
  83. static const size_t cnt = 200000000;
  84. #endif
  85. static const size_t cnt2 = cnt/100;
  86. static uint64_t bfs;
  87. static long double loop_cost;
  88. static long double cycles_per_nanosecond;
  89. static mach_timebase_info_data_t tbi;
  90. static void __attribute__((noinline))
  91. print_result(uint64_t s, const char *str)
  92. {
  93. uint64_t d, e = mach_absolute_time();
  94. long double dd;
  95. d = e - s;
  96. if (tbi.numer != tbi.denom) {
  97. d *= tbi.numer;
  98. d /= tbi.denom;
  99. }
  100. dd = (__typeof__(dd))d / (__typeof__(dd))cnt;
  101. dd -= loop_cost;
  102. if (loop_cost == 0.0) {
  103. loop_cost = dd;
  104. }
  105. dd *= cycles_per_nanosecond;
  106. dd = roundl(dd * 200.0)/200.0;
  107. printf("%-45s%15.3Lf cycles\n", str, dd);
  108. }
  109. #if BENCH_SLOW || !TARGET_OS_EMBEDDED
  110. static void __attribute__((noinline))
  111. print_result2(uint64_t s, const char *str)
  112. {
  113. uint64_t d, e = mach_absolute_time();
  114. long double dd;
  115. d = e - s;
  116. if (tbi.numer != tbi.denom) {
  117. d *= tbi.numer;
  118. d /= tbi.denom;
  119. }
  120. dd = (__typeof__(dd))d / (__typeof__(dd))cnt2;
  121. dd -= loop_cost;
  122. dd *= cycles_per_nanosecond;
  123. printf("%-45s%15.3Lf cycles\n", str, dd);
  124. }
  125. #endif
  126. #if defined(__i386__) || defined(__x86_64__)
  127. static inline uint64_t
  128. rdtsc(void)
  129. {
  130. uint32_t lo, hi;
  131. __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
  132. return (uint64_t)hi << 32 | lo;
  133. }
  134. #endif
  135. static struct fml {
  136. struct fml *fml_next;
  137. } *fixed_malloc_lifo_head;
  138. struct fml *fixed_malloc_lifo(void);// __attribute__((noinline));
  139. void fixed_free_lifo(struct fml *fml);// __attribute__((noinline));
  140. struct fml *
  141. fixed_malloc_lifo(void)
  142. {
  143. struct fml *fml_r = fixed_malloc_lifo_head;
  144. if (fml_r) {
  145. fixed_malloc_lifo_head = fml_r->fml_next;
  146. return fml_r;
  147. } else {
  148. return (struct fml *)malloc(32);
  149. }
  150. }
  151. void
  152. fixed_free_lifo(struct fml *fml)
  153. {
  154. fml->fml_next = fixed_malloc_lifo_head;
  155. fixed_malloc_lifo_head = fml;
  156. }
  157. int
  158. main(void)
  159. {
  160. pthread_mutex_t plock = PTHREAD_MUTEX_INITIALIZER;
  161. OSSpinLock slock = OS_SPINLOCK_INIT;
  162. BasicObject *bo;
  163. BasicClass *bc;
  164. pthread_t pthr_pause;
  165. dispatch_queue_t q, mq;
  166. kern_return_t kr;
  167. #if BENCH_SLOW
  168. semaphore_t sem;
  169. #endif
  170. uint64_t freq;
  171. uint64_t s;
  172. size_t freq_len = sizeof(freq);
  173. size_t bf_cnt = cnt;
  174. unsigned i;
  175. int r;
  176. printf("\n====================================================================\n");
  177. printf("[TEST] dispatch benchmark\n");
  178. printf("[PID] %d\n", getpid());
  179. printf("====================================================================\n\n");
  180. r = sysctlbyname("hw.cpufrequency", &freq, &freq_len, NULL, 0);
  181. assert(r != -1);
  182. assert(freq_len == sizeof(freq));
  183. cycles_per_nanosecond = (long double)freq / (long double)NSEC_PER_SEC;
  184. #if BENCH_SLOW
  185. NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
  186. assert(pool);
  187. #endif
  188. /* Malloc has different logic for threaded apps. */
  189. r = pthread_create(&pthr_pause, NULL, force_a_thread, NULL);
  190. assert(r == 0);
  191. kr = mach_timebase_info(&tbi);
  192. assert(kr == 0);
  193. #if defined(__i386__) || defined(__x86_64__)
  194. assert(tbi.numer == tbi.denom); /* This will fail on PowerPC. */
  195. #endif
  196. bo = [[BasicObject alloc] init];
  197. assert(bo);
  198. bc = new BasicClass();
  199. assert(bc);
  200. q = dispatch_queue_create("com.apple.bench-dispatch", NULL);
  201. assert(q);
  202. mq = dispatch_get_main_queue();
  203. assert(mq);
  204. printf("%-45s%15Lf\n\n", "Cycles per nanosecond:", cycles_per_nanosecond);
  205. s = mach_absolute_time();
  206. for (i = cnt; i; i--) {
  207. __asm__ __volatile__ ("");
  208. }
  209. print_result(s, "Empty loop:");
  210. printf("\nLoop cost subtracted from the following:\n\n");
  211. #if TARGET_OS_EMBEDDED
  212. s = mach_absolute_time();
  213. for (i = cnt; i; i--) {
  214. mach_absolute_time();
  215. }
  216. print_result(s, "mach_absolute_time():");
  217. #else
  218. s = mach_absolute_time();
  219. for (i = cnt2; i; i--) {
  220. mach_absolute_time();
  221. }
  222. print_result2(s, "mach_absolute_time():");
  223. #endif
  224. #if defined(__i386__) || defined(__x86_64__)
  225. s = mach_absolute_time();
  226. for (i = cnt; i; i--) {
  227. rdtsc();
  228. }
  229. print_result(s, "rdtsc():");
  230. #endif
  231. #if BENCH_SLOW
  232. s = mach_absolute_time();
  233. for (i = cnt2; i; i--) {
  234. pthread_t pthr;
  235. void *pr;
  236. r = pthread_create(&pthr, NULL, (void *(*)(void *))func, NULL);
  237. assert(r == 0);
  238. r = pthread_join(pthr, &pr);
  239. assert(r == 0);
  240. }
  241. print_result2(s, "pthread create+join:");
  242. s = mach_absolute_time();
  243. for (i = cnt2; i; i--) {
  244. kr = semaphore_create(mach_task_self(), &sem, SYNC_POLICY_FIFO, 0);
  245. assert(kr == 0);
  246. kr = semaphore_destroy(mach_task_self(), sem);
  247. assert(kr == 0);
  248. }
  249. print_result2(s, "Mach semaphore create/destroy:");
  250. kr = semaphore_create(mach_task_self(), &sem, SYNC_POLICY_FIFO, 0);
  251. assert(kr == 0);
  252. s = mach_absolute_time();
  253. for (i = cnt2; i; i--) {
  254. kr = semaphore_signal(sem);
  255. assert(kr == 0);
  256. }
  257. print_result2(s, "Mach semaphore signal:");
  258. kr = semaphore_destroy(mach_task_self(), sem);
  259. assert(kr == 0);
  260. s = mach_absolute_time();
  261. for (i = cnt; i; i--) {
  262. free(malloc(32));
  263. }
  264. print_result(s, "free(malloc(32)):");
  265. s = mach_absolute_time();
  266. for (i = cnt / 2; i; i--) {
  267. void *m1 = malloc(32);
  268. void *m2 = malloc(32);
  269. free(m1);
  270. free(m2);
  271. }
  272. print_result(s, "Avoiding the MRU cache of free(malloc(32)):");
  273. s = mach_absolute_time();
  274. for (i = cnt; i; i--) {
  275. fixed_free_lifo(fixed_malloc_lifo());
  276. }
  277. print_result(s, "per-thread/fixed free(malloc(32)):");
  278. s = mach_absolute_time();
  279. for (i = cnt; i; i--) {
  280. assert(strtoull("18446744073709551615", NULL, 0) == ~0ull);
  281. }
  282. print_result(s, "strtoull(\"18446744073709551615\") == ~0ull:");
  283. #endif
  284. s = mach_absolute_time();
  285. for (i = cnt; i; i--) {
  286. func();
  287. }
  288. print_result(s, "Empty function call:");
  289. #ifdef __BLOCKS__
  290. s = mach_absolute_time();
  291. for (i = cnt; i; i--) {
  292. block();
  293. }
  294. print_result(s, "Empty block call:");
  295. #endif
  296. s = mach_absolute_time();
  297. for (i = cnt; i; i--) {
  298. bc->virtfunc();
  299. }
  300. print_result(s, "Empty C++ virtual call:");
  301. s = mach_absolute_time();
  302. for (i = cnt; i; i--) {
  303. [bo method];
  304. }
  305. print_result(s, "Empty ObjC call:");
  306. #if BENCH_SLOW
  307. s = mach_absolute_time();
  308. for (i = cnt2; i; i--) {
  309. [bo description];
  310. }
  311. print_result2(s, "\"description\" ObjC call:");
  312. [pool release];
  313. pool = NULL;
  314. #endif
  315. s = mach_absolute_time();
  316. for (i = cnt; i; i--) {
  317. __asm__ __volatile__ ("nop");
  318. }
  319. print_result(s, "raw 'nop':");
  320. #if defined(__i386__) || defined(__x86_64__)
  321. s = mach_absolute_time();
  322. for (i = cnt; i; i--) {
  323. __asm__ __volatile__ ("pause");
  324. }
  325. print_result(s, "raw 'pause':");
  326. s = mach_absolute_time();
  327. for (i = cnt; i; i--) {
  328. __asm__ __volatile__ ("mfence");
  329. }
  330. print_result(s, "Atomic mfence:");
  331. s = mach_absolute_time();
  332. for (i = cnt; i; i--) {
  333. __asm__ __volatile__ ("lfence");
  334. }
  335. print_result(s, "Atomic lfence:");
  336. s = mach_absolute_time();
  337. for (i = cnt; i; i--) {
  338. __asm__ __volatile__ ("sfence");
  339. }
  340. print_result(s, "Atomic sfence:");
  341. s = mach_absolute_time();
  342. for (i = cnt; i; i--) {
  343. uint64_t sidt_rval;
  344. __asm__ __volatile__ ("sidt %0" : "=m" (sidt_rval));
  345. }
  346. print_result(s, "'sidt' instruction:");
  347. s = mach_absolute_time();
  348. for (i = cnt; i; i--) {
  349. long prev;
  350. __asm__ __volatile__ ("cmpxchg %1,%2"
  351. : "=a" (prev) : "r" (0l), "m" (global), "0" (1l));
  352. }
  353. print_result(s, "'cmpxchg' without the 'lock' prefix:");
  354. s = mach_absolute_time();
  355. for (i = cnt; i; i--) {
  356. global = 0;
  357. __asm__ __volatile__ ("mfence" ::: "memory");
  358. }
  359. print_result(s, "Store + mfence:");
  360. s = mach_absolute_time();
  361. for (i = cnt; i; i--) {
  362. unsigned long _clbr;
  363. #ifdef __LP64__
  364. __asm__ __volatile__ ("cpuid" : "=a" (_clbr)
  365. : "0" (0) : "rbx", "rcx", "rdx", "cc", "memory");
  366. #else
  367. #ifdef __llvm__
  368. __asm__ __volatile__ ("cpuid" : "=a" (_clbr) : "0" (0)
  369. : "ebx", "ecx", "edx", "cc", "memory" );
  370. #else // gcc does not allow inline i386 asm to clobber ebx
  371. __asm__ __volatile__ ("pushl %%ebx\n\tcpuid\n\tpopl %%ebx"
  372. : "=a" (_clbr) : "0" (0) : "ecx", "edx", "cc", "memory" );
  373. #endif
  374. #endif
  375. }
  376. print_result(s, "'cpuid' instruction:");
  377. #elif defined(__arm__)
  378. #include <arm/arch.h>
  379. #if !defined(_ARM_ARCH_7) && defined(__thumb__)
  380. #error "GCD requires instructions unvailable in ARMv6 Thumb1"
  381. #endif
  382. #ifdef _ARM_ARCH_7
  383. s = mach_absolute_time();
  384. for (i = cnt; i; i--) {
  385. __asm__ __volatile__ ("yield");
  386. }
  387. print_result(s, "raw 'yield':");
  388. #endif
  389. s = mach_absolute_time();
  390. for (i = cnt; i; i--) {
  391. #ifdef _ARM_ARCH_7
  392. __asm__ __volatile__ ("dmb ish" : : : "memory");
  393. #else
  394. __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
  395. #endif
  396. }
  397. print_result(s, "'dmb ish' instruction:");
  398. #ifdef _ARM_ARCH_7
  399. s = mach_absolute_time();
  400. for (i = cnt; i; i--) {
  401. __asm__ __volatile__ ("dmb ishst" : : : "memory");
  402. }
  403. print_result(s, "'dmb ishst' instruction:");
  404. #endif
  405. #ifdef _ARM_ARCH_7
  406. s = mach_absolute_time();
  407. for (i = cnt; i; i--) {
  408. __asm__ __volatile__ ("str %[_r], [%[_p], %[_o]]" :
  409. : [_p] "p" (&global), [_o] "M" (0), [_r] "r" (0) : "memory");
  410. __asm__ __volatile__ ("dmb ishst" : : : "memory");
  411. }
  412. print_result(s, "'str + dmb ishst' instructions:");
  413. #endif
  414. #ifdef _ARM_ARCH_7
  415. s = mach_absolute_time();
  416. for (i = cnt; i; i--) {
  417. uintptr_t prev;
  418. uint32_t t;
  419. do {
  420. __asm__ __volatile__ ("ldrex %[_r], [%[_p], %[_o]]"
  421. : [_r] "=&r" (prev) \
  422. : [_p] "p" (&global), [_o] "M" (0) : "memory");
  423. __asm__ __volatile__ ("strex %[_t], %[_r], [%[_p], %[_o]]"
  424. : [_t] "=&r" (t) \
  425. : [_p] "p" (&global), [_o] "M" (0), [_r] "r" (0) : "memory");
  426. } while (t);
  427. }
  428. print_result(s, "'ldrex + strex' instructions:");
  429. #endif
  430. s = mach_absolute_time();
  431. for (i = cnt; i; i--) {
  432. #ifdef _ARM_ARCH_7
  433. __asm__ __volatile__ ("dsb ish" : : : "memory");
  434. #else
  435. __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory");
  436. #endif
  437. }
  438. print_result(s, "'dsb ish' instruction:");
  439. #if BENCH_SLOW
  440. s = mach_absolute_time();
  441. for (i = cnt; i; i--) {
  442. register long _swtch_pri __asm__("ip") = -59;
  443. __asm__ __volatile__ ("svc 0x80" : : "r" (_swtch_pri) : "r0", "memory");
  444. }
  445. print_result(s, "swtch_pri syscall:");
  446. s = mach_absolute_time();
  447. for (i = cnt; i; i--) {
  448. register long _r0 __asm__("r0") = 0, _r1 __asm__("r1") = 1, _r2 __asm__("r2") = 1;
  449. register long _thread_switch __asm__("ip") = -61;
  450. __asm__ __volatile__ ("svc 0x80" : "+r" (_r0)
  451. : "r" (_r1), "r" (_r2), "r" (_thread_switch): "memory");
  452. }
  453. print_result(s, "thread_switch syscall:");
  454. #endif
  455. #endif // __arm__
  456. #if BENCH_SLOW
  457. s = mach_absolute_time();
  458. for (i = cnt; i; i--) {
  459. pthread_yield_np();
  460. }
  461. print_result(s, "pthread_yield_np():");
  462. s = mach_absolute_time();
  463. for (i = cnt2; i; i--) {
  464. usleep(0);
  465. }
  466. print_result2(s, "usleep(0):");
  467. #endif
  468. s = mach_absolute_time();
  469. for (i = cnt; i; i--) {
  470. __sync_lock_test_and_set(&global, 0);
  471. }
  472. print_result(s, "Atomic xchg:");
  473. s = mach_absolute_time();
  474. for (i = cnt; i; i--) {
  475. __sync_val_compare_and_swap(&global, 1, 0);
  476. }
  477. print_result(s, "Atomic cmpxchg:");
  478. s = mach_absolute_time();
  479. for (i = cnt; i; i--) {
  480. __sync_fetch_and_add(&global, 1);
  481. }
  482. print_result(s, "Atomic increment:");
  483. {
  484. global = 0;
  485. volatile int32_t *g = &global;
  486. s = mach_absolute_time();
  487. for (i = cnt; i; i--) {
  488. uint32_t result;
  489. __sync_and_and_fetch(g, 1);
  490. result = *g;
  491. if (result) {
  492. abort();
  493. }
  494. }
  495. print_result(s, "Atomic and-and-fetch, reloading result:");
  496. }
  497. {
  498. global = 0;
  499. volatile int32_t *g = &global;
  500. s = mach_absolute_time();
  501. for (i = cnt; i; i--) {
  502. uint32_t result;
  503. result = __sync_and_and_fetch(g, 1);
  504. if (result) {
  505. abort();
  506. }
  507. }
  508. print_result(s, "Atomic and-and-fetch, using result:");
  509. }
  510. global = 0;
  511. s = mach_absolute_time();
  512. for (i = cnt; i; i--) {
  513. OSAtomicIncrement32Barrier(&global);
  514. }
  515. print_result(s, "OSAtomicIncrement32Barrier:");
  516. global = 0;
  517. s = mach_absolute_time();
  518. for (i = cnt; i; i--) {
  519. OSAtomicIncrement32(&global);
  520. }
  521. print_result(s, "OSAtomicIncrement32:");
  522. w_global = 0;
  523. s = mach_absolute_time();
  524. for (i = cnt; i; i--) {
  525. OSAtomicIncrement64Barrier(&w_global);
  526. }
  527. print_result(s, "OSAtomicIncrement64Barrier:");
  528. w_global = 0;
  529. s = mach_absolute_time();
  530. for (i = cnt; i; i--) {
  531. OSAtomicIncrement64(&w_global);
  532. }
  533. print_result(s, "OSAtomicIncrement64:");
  534. global = 0;
  535. s = mach_absolute_time();
  536. for (i = cnt; i; i--) {
  537. while (!__sync_bool_compare_and_swap(&global, 0, 1)) {
  538. do {
  539. #if defined(__i386__) || defined(__x86_64__)
  540. __asm__ __volatile__ ("pause");
  541. #elif defined(__arm__) && defined _ARM_ARCH_7
  542. __asm__ __volatile__ ("yield");
  543. #endif
  544. } while (global);
  545. }
  546. global = 0;
  547. }
  548. print_result(s, "Inlined spin lock/unlock:");
  549. s = mach_absolute_time();
  550. for (i = cnt; i; i--) {
  551. OSSpinLockLock(&slock);
  552. OSSpinLockUnlock(&slock);
  553. }
  554. print_result(s, "OSSpinLock/Unlock:");
  555. s = mach_absolute_time();
  556. for (i = cnt; i; i--) {
  557. r = pthread_mutex_lock(&plock);
  558. assert(r == 0);
  559. r = pthread_mutex_unlock(&plock);
  560. assert(r == 0);
  561. }
  562. print_result(s, "pthread lock/unlock:");
  563. #ifdef __BLOCKS__
  564. s = mach_absolute_time();
  565. for (i = cnt; i; i--) {
  566. dispatch_sync(q, ^{ });
  567. }
  568. print_result(s, "dispatch_sync:");
  569. #endif
  570. s = mach_absolute_time();
  571. for (i = cnt; i; i--) {
  572. dispatch_sync_f(q, NULL, (void (*)(void *))func);
  573. }
  574. print_result(s, "dispatch_sync_f:");
  575. #ifdef __BLOCKS__
  576. s = mach_absolute_time();
  577. for (i = cnt; i; i--) {
  578. dispatch_barrier_sync(q, ^{ });
  579. }
  580. print_result(s, "dispatch_barrier_sync:");
  581. #endif
  582. s = mach_absolute_time();
  583. for (i = cnt; i; i--) {
  584. dispatch_barrier_sync_f(q, NULL, (void (*)(void *))func);
  585. }
  586. print_result(s, "dispatch_barrier_sync_f:");
  587. s = mach_absolute_time();
  588. dispatch_apply_f(cnt,
  589. dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
  590. NULL, (void (*)(void *, size_t))func);
  591. s += loop_cost; // cancel out the implicit subtraction done by the next line
  592. print_result(s, "dispatch_apply_f():");
  593. // do a "double backflip" to hit the fast-path of the enqueue/dequeue logic
  594. bfs = mach_absolute_time();
  595. dispatch_async_f(dispatch_get_main_queue(), &bf_cnt, backflip);
  596. dispatch_async_f(dispatch_get_main_queue(), &bf_cnt, backflip);
  597. dispatch_main();
  598. }
  599. __attribute__((noinline))
  600. void
  601. backflip_done(void)
  602. {
  603. print_result(bfs, "dispatch_async_f():");
  604. exit(EXIT_SUCCESS);
  605. }
  606. void
  607. backflip(void *ctxt)
  608. {
  609. size_t *bf_cnt = (size_t *)ctxt;
  610. if (--(*bf_cnt)) {
  611. return dispatch_async_f(dispatch_get_main_queue(), ctxt, backflip);
  612. }
  613. backflip_done();
  614. }