diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h index 153001e7b62e30..5ec792e9b9a29c 100644 --- a/libcxx/include/__atomic/atomic_sync.h +++ b/libcxx/include/__atomic/atomic_sync.h @@ -108,15 +108,13 @@ struct __atomic_wait_backoff_impl { _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const { - if (__elapsed > chrono::microseconds(64)) { + if (__elapsed > chrono::microseconds(4)) { auto __contention_address = __waitable_traits::__atomic_contention_address(__a_); __cxx_contention_t __monitor_val; if (__update_monitor_val_and_poll(__contention_address, __monitor_val)) return true; std::__libcpp_atomic_wait(__contention_address, __monitor_val); - } else if (__elapsed > chrono::microseconds(4)) - __libcpp_thread_yield(); - else { + } else { } // poll return false; } diff --git a/libcxx/test/benchmarks/atomic_wait.bench.cpp b/libcxx/test/benchmarks/atomic_wait.bench.cpp index d19f5fbed8ad60..4e934a94a93541 100644 --- a/libcxx/test/benchmarks/atomic_wait.bench.cpp +++ b/libcxx/test/benchmarks/atomic_wait.bench.cpp @@ -12,21 +12,86 @@ #include #include #include +#include +#include #include +#include +#include #include "benchmark/benchmark.h" #include "make_test_thread.h" using namespace std::chrono_literals; -void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) { - std::atomic a; - auto thread_func = [&](std::stop_token st) { +struct HighPrioTask { + sched_param param; + pthread_attr_t attr_t; + pthread_t thread; + std::atomic_bool stopped{false}; + + HighPrioTask(const HighPrioTask&) = delete; + + HighPrioTask() { + pthread_attr_init(&attr_t); + pthread_attr_setschedpolicy(&attr_t, SCHED_FIFO); + param.sched_priority = sched_get_priority_max(SCHED_FIFO); + pthread_attr_setschedparam(&attr_t, ¶m); + pthread_attr_setinheritsched(&attr_t, PTHREAD_EXPLICIT_SCHED); + + auto thread_fun = [](void* arg) -> void* { + auto* stop = reinterpret_cast(arg); + while (!stop->load(std::memory_order_relaxed)) { + // spin + } + return nullptr; + }; + + if (pthread_create(&thread, &attr_t, thread_fun, &stopped) != 0) { + throw std::runtime_error("failed to create thread"); + } + } + + ~HighPrioTask() { + stopped = true; + pthread_attr_destroy(&attr_t); + pthread_join(thread, nullptr); + } +}; + +template +struct NumHighPrioTasks { + static constexpr auto value = N; +}; + +struct KeepNotifying { + template + static void notify(Atomic& a, std::stop_token st) { while (!st.stop_requested()) { a.fetch_add(1, std::memory_order_relaxed); a.notify_all(); } - }; + } +}; + +template +struct NotifyEveryNus { + template + static void notify(Atomic& a, std::stop_token st) { + while (!st.stop_requested()) { + auto start = std::chrono::system_clock::now(); + a.fetch_add(1, std::memory_order_relaxed); + a.notify_all(); + while (std::chrono::system_clock::now() - start < std::chrono::microseconds{N}) { + } + } + } +}; + +template +void BM_1_atomic_1_waiter_1_notifier(benchmark::State& state) { + [[maybe_unused]] std::array tasks{}; + std::atomic a; + auto thread_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); }; std::uint64_t total_loop_test_param = state.range(0); @@ -39,19 +104,51 @@ void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) { } } } -BENCHMARK(BM_atomic_wait_one_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); -void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) { +BENCHMARK(BM_1_atomic_1_waiter_1_notifier>) + ->RangeMultiplier(2) + ->Range(1 << 18, 1 << 20); +BENCHMARK(BM_1_atomic_1_waiter_1_notifier, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 12, 1 << 14); +BENCHMARK(BM_1_atomic_1_waiter_1_notifier, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 12, 1 << 14); + +BENCHMARK(BM_1_atomic_1_waiter_1_notifier>) + ->RangeMultiplier(2) + ->Range(1 << 18, 1 << 20); +BENCHMARK(BM_1_atomic_1_waiter_1_notifier, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 12, 1 << 14); +BENCHMARK(BM_1_atomic_1_waiter_1_notifier, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 12, 1 << 14); + +BENCHMARK(BM_1_atomic_1_waiter_1_notifier>) + ->RangeMultiplier(2) + ->Range(1 << 4, 1 << 6); +BENCHMARK(BM_1_atomic_1_waiter_1_notifier, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 3, 1 << 5); +BENCHMARK(BM_1_atomic_1_waiter_1_notifier, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 3, 1 << 5); + +template +struct NumWaitingThreads { + static constexpr auto value = N; +}; + +template +void BM_1_atomic_multi_waiter_1_notifier(benchmark::State& state) { + [[maybe_unused]] std::array tasks{}; + std::atomic a; - auto notify_func = [&](std::stop_token st) { - while (!st.stop_requested()) { - a.fetch_add(1, std::memory_order_relaxed); - a.notify_all(); - } - }; + auto notify_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); }; std::uint64_t total_loop_test_param = state.range(0); - constexpr auto num_waiting_threads = 15; + constexpr auto num_waiting_threads = NumWaitingThreads::value; std::vector wait_threads; wait_threads.reserve(num_waiting_threads); @@ -93,17 +190,111 @@ void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) { t.join(); } } -BENCHMARK(BM_atomic_wait_multi_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 20); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 14, 1 << 16); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 12, 1 << 14); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 12); + +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<3>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 12); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<7>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<15>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); + +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<3>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<7>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<15>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 4, 1 << 6); + +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 4, 1 << 6); -void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state) { +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<3>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<7>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<15>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 4, 1 << 6); + +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<3>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<7>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<15>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 4, 1 << 6); + +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 4, 1 << 6); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 3, 1 << 5); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 2, 1 << 4); + +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<3>, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 3, 1 << 5); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<7>, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 2, 1 << 4); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<15>, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 1, 1 << 3); + +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<3>, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 3, 1 << 5); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<7>, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 2, 1 << 4); +BENCHMARK(BM_1_atomic_multi_waiter_1_notifier, NumWaitingThreads<15>, NumHighPrioTasks<7>>) + ->RangeMultiplier(2) + ->Range(1 << 1, 1 << 3); + +template +struct NumberOfAtomics { + static constexpr auto value = N; +}; + +template +void BM_N_atomics_N_waiter_N_notifier(benchmark::State& state) { + [[maybe_unused]] std::array tasks{}; const std::uint64_t total_loop_test_param = state.range(0); - constexpr std::uint64_t num_atomics = 7; + constexpr std::uint64_t num_atomics = NumberOfAtomics::value; std::vector> atomics(num_atomics); auto notify_func = [&](std::stop_token st, size_t idx) { while (!st.stop_requested()) { - atomics[idx].fetch_add(1, std::memory_order_relaxed); - atomics[idx].notify_all(); + NotifyPolicy::notify(atomics[idx], st); } }; @@ -154,6 +345,83 @@ void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state) t.join(); } } -BENCHMARK(BM_atomic_wait_multi_thread_wait_different_atomics)->RangeMultiplier(2)->Range(1 << 10, 1 << 20); + +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 12, 1 << 14); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 12); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 12); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); + +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<2>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 10, 1 << 12); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<3>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<5>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<7>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); + +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<2>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<3>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 8, 1 << 10); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<5>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 7, 1 << 9); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<7>, NumHighPrioTasks<0>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); + +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 7, 1 << 9); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 7, 1 << 9); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 4, 1 << 6); + +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<2>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 7, 1 << 9); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<3>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 7, 1 << 9); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<5>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 5, 1 << 7); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<7>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 3, 1 << 5); + +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<2>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<3>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 6, 1 << 8); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<5>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 5, 1 << 7); +BENCHMARK(BM_N_atomics_N_waiter_N_notifier, NumberOfAtomics<7>, NumHighPrioTasks<4>>) + ->RangeMultiplier(2) + ->Range(1 << 3, 1 << 5); BENCHMARK_MAIN();