From: Nick Bowler Date: Thu, 25 Jan 2024 04:19:16 +0000 (-0500) Subject: libcdecl: Use a different pthread_once workalike for Windows. X-Git-Tag: v1.3~23 X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/commitdiff_plain/bf336dc52970daa394d119b11284a1b5016a74c0 libcdecl: Use a different pthread_once workalike for Windows. Replace the windows-once implementation from gnulib with a different one adapted from the pthreads-win32 library. This version uses less static data and does not allocate any Windows resources in the normal (uncontended) case. --- diff --git a/bootstrap b/bootstrap index 02ca644..639e6c4 100755 --- a/bootstrap +++ b/bootstrap @@ -33,7 +33,7 @@ if test -x $GNULIB/gnulib-tool; then exec 3>lib/symfiles.tmp 4&3 ) || die "gnulib-tool failed" shared_modules=`LC_ALL=C sort -u <&4` diff --git a/lib/local/modules/dx-threadlib b/lib/local/modules/dx-threadlib index 9499e3b..e5ee6a1 100644 --- a/lib/local/modules/dx-threadlib +++ b/lib/local/modules/dx-threadlib @@ -7,7 +7,6 @@ Files: Depends-on: gnulib-local threadlib -windows-once [test x"$gl_threads_api" = x"windows"] configure.ac-early: dnl AC_REQUIRE has the effect of making the macro expansion unconditional; diff --git a/src/thread-w32.h b/src/thread-w32.h index fb3bdc4..181ebfc 100644 --- a/src/thread-w32.h +++ b/src/thread-w32.h @@ -8,6 +8,19 @@ * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * + * The init_once implementation is adapted from the Pthreads-win32 library + * implementation based on MCS (Mellor-Crummy Scott) locks, originally + * distributed with the following copyright and permission notice: + * + * Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright(C) 1998 John E. Bossom + * Copyright(C) 1999,2005 Pthreads-win32 contributors + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -19,7 +32,6 @@ #define WIN32_LEAN_AND_MEAN #include -#include "windows-once.h" static DWORD tls_key; #define tls_key_valid (tls_key != TLS_OUT_OF_INDEXES) @@ -36,16 +48,113 @@ static BOOL tls_set(void *p) return TlsSetValue(tls_key, p); } -static void init_once_with_tls(void) +/* + * Synchronize with another thread's call to init_once_wait on the same + * HANDLE object, which must be initialized to null. If init_once_wait + * got there first, we receive a handle to the event object which can + * then be signaled. + */ +static void init_once_signal(HANDLE *ep) { - tls_key = TlsAlloc(); - init_once_cb(); + /* + * Note that INVALID_HANDLE_VALUE is distinct from a null pointer + * and also distinct from any handle returned by CreateEvent. + */ + HANDLE e = INVALID_HANDLE_VALUE; + + if ((e = InterlockedCompareExchangePointer(ep, e, NULL))) + SetEvent(e); } +/* + * Synchronize with another thread's call to init_once_signal on the same + * HANDLE object. + */ +static void init_once_wait(HANDLE *ep) +{ +#if _WIN64 + if (!InterlockedAdd64((LONG64 *)ep, 0)) // load with memory barrier +#else + if (!InterlockedAdd((LONG *)ep, 0)) +#endif + { + HANDLE e; + + e = CreateEvent(NULL, FALSE, FALSE, NULL); + if (!InterlockedCompareExchangePointer(ep, e, NULL)) + WaitForSingleObject(e, INFINITE); + CloseHandle(e); + } +} + +/* + * This implementation using an MCS lock variation requires only a single + * pointer of shared global state initialized to null, and in the uncontended + * case does not require allocation of any Windows resources whatsoever. + * + * These locks are described in the paper: + * + * "Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors" + * by John M. Mellor-Crummey and Michael L. Scott. + * ACM Transactions on Computer Systems Volume 9, Issue 1 (Feb. 1991). + * + * The basic idea is that each thread has a local state, which includes a + * pointer to the next waiting thread. The global state is a tail pointer to + * the last waiting thread. The running thread holds the lock and also the + * pointer to the first waiter. + * + * On acquire, atomically swap our fresh local state with the global tail + * pointer, becoming the new last waiter. We receive a pointer to the previous + * last waiter (or nothing, in the unlocked case). At this point it is safe + * for a new thread to come along and update the tail pointer again. If + * needed, we then update the last waiter to point to our thread, signal + * that this is completed, and then wait to be signaled. + * + * On release, if the tail pointer points to us there are no waiters, and this + * can be confirmed with an atomic compare and exchange to the done state, + * which is equivalent to the original state except that a subsequent acquirer + * will know that the initialization has been previously completed. + * + * If that didn't unlock the lock, we need to wait for the signal from the next + * thread (which may not have updated our next pointer yet), then signal the + * next thread to wake up. Eventually the queue will empty and the lock is + * left in the done state, at which point a simple atomic load can determine + * that nothing else needs to happen. + */ static int init_once(void) { - static glwthread_once_t ctrl = GLWTHREAD_ONCE_INIT; + struct once_state * const init_done = (void *)-1; + + struct once_state { + struct once_state *next; + HANDLE ready_event, next_event; + }; + + static void * volatile tail; + struct once_state local = {0}; + struct once_state *p; + + /* fast path for the normal (init completed) case. */ + if (tail == init_done) + return 1; + + if (!(p = InterlockedExchangePointer(&tail, &local))) { + /* we're number one! */ + tls_key = TlsAlloc(); + init_once_cb(); + } else if (p != init_done) { + /* contended, wait for predecessor */ + p->next = &local; + init_once_signal(&p->next_event); + init_once_wait(&local.ready_event); + } + + p = InterlockedCompareExchangePointer(&tail, init_done, &local); + if (p != &local) { + /* contended, wait for successor */ + init_once_wait(&local.next_event); + init_once_signal(&local.next->ready_event); + } - glwthread_once(&ctrl, init_once_with_tls); return 1; } diff --git a/t/errthread.c b/t/errthread.c index 445fc83..0f1fdf1 100644 --- a/t/errthread.c +++ b/t/errthread.c @@ -101,6 +101,11 @@ static void run_thread2(void) tap_bail_out("run_thread2 failed: %s", strerror(err)); } +static void check_init_once(void) +{ + tap_result(1, "init_once # SKIP test not implemented"); +} + #elif USE_ISOC_THREADS #define THREAD_API "isoc" #include @@ -121,6 +126,11 @@ static void run_thread2(void) tap_bail_out("run_thread2 failed"); } +static void check_init_once(void) +{ + tap_result(1, "init_once # SKIP test not implemented"); +} + #elif USE_WINDOWS_THREADS #define THREAD_API "windows" #define WIN32_LEAN_AND_MEAN @@ -148,6 +158,61 @@ static void run_thread2(void) tap_bail_out("run_thread2 failed (%lu)", GetLastError()); } + +/* Also include init_once sanity test on Windows */ +#include "thread-w32.h" + +HANDLE init_semaphore; + +static void init_once_cb(void) +{ + tap_result(1, "init_once_cb start"); + + ReleaseSemaphore(init_semaphore, 2, NULL); + Sleep(1000); + + tap_diag("init_once_cb exit"); +} + +static DWORD WINAPI init_thread(LPVOID p) +{ + WaitForSingleObject(init_semaphore, INFINITE); + + tap_diag("init_thread start"); + init_once(); + tap_diag("init_thread exit"); + + return 0; +} + +static void check_init_once(void) +{ + HANDLE threads[3]; + DWORD rc; + int i; + + if (!(init_semaphore = CreateSemaphore(NULL, 0, 10, NULL))) + tap_bail_out("check_init_once failed (%lu)", GetLastError()); + + if (!(threads[0] = CreateThread(NULL, 0, init_thread, NULL, 0, &rc))) + tap_bail_out("check_init_once failed (%lu)", GetLastError()); + if (!(threads[1] = CreateThread(NULL, 0, init_thread, NULL, 0, &rc))) + tap_bail_out("check_init_once failed (%lu)", GetLastError()); + if (!(threads[2] = CreateThread(NULL, 0, init_thread, NULL, 0, &rc))) + tap_bail_out("check_init_once failed (%lu)", GetLastError()); + + ReleaseSemaphore(init_semaphore, 1, NULL); + WaitForMultipleObjects(3, threads, TRUE, INFINITE); + + for (i = 0; i < 3; i++) + CloseHandle(threads[i]); + CloseHandle(init_semaphore); + + for (i = 0; i < 10000000; i++) + init_once(); +} + + #else #undef THREAD_API int main(void) @@ -163,7 +228,7 @@ int main(void) const struct cdecl_error *err; tap_diag("using thread API: " THREAD_API); - tap_plan(9); + tap_plan(10); /* Simulate an error in the main thread. */ cdecl__errmsg(CDECL__ENOMEM); @@ -195,6 +260,12 @@ int main(void) * Main thread allocation should be the only one left. */ tap_result(test_live_allocations() == 1, "thread cleanup"); + + /* + * Basic init_once sanity check for Windows. + */ + check_init_once(); + tap_done(); } #endif