X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/blobdiff_plain/c11f668ec9c5b2f0016cf6920a111c52e895b24a..bf336dc52970daa394d119b11284a1b5016a74c0:/src/thread-w32.h diff --git a/src/thread-w32.h b/src/thread-w32.h index fb3bdc4..181ebfc 100644 --- a/src/thread-w32.h +++ b/src/thread-w32.h @@ -8,6 +8,19 @@ * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * + * The init_once implementation is adapted from the Pthreads-win32 library + * implementation based on MCS (Mellor-Crummy Scott) locks, originally + * distributed with the following copyright and permission notice: + * + * Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright(C) 1998 John E. Bossom + * Copyright(C) 1999,2005 Pthreads-win32 contributors + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -19,7 +32,6 @@ #define WIN32_LEAN_AND_MEAN #include -#include "windows-once.h" static DWORD tls_key; #define tls_key_valid (tls_key != TLS_OUT_OF_INDEXES) @@ -36,16 +48,113 @@ static BOOL tls_set(void *p) return TlsSetValue(tls_key, p); } -static void init_once_with_tls(void) +/* + * Synchronize with another thread's call to init_once_wait on the same + * HANDLE object, which must be initialized to null. If init_once_wait + * got there first, we receive a handle to the event object which can + * then be signaled. + */ +static void init_once_signal(HANDLE *ep) { - tls_key = TlsAlloc(); - init_once_cb(); + /* + * Note that INVALID_HANDLE_VALUE is distinct from a null pointer + * and also distinct from any handle returned by CreateEvent. + */ + HANDLE e = INVALID_HANDLE_VALUE; + + if ((e = InterlockedCompareExchangePointer(ep, e, NULL))) + SetEvent(e); } +/* + * Synchronize with another thread's call to init_once_signal on the same + * HANDLE object. + */ +static void init_once_wait(HANDLE *ep) +{ +#if _WIN64 + if (!InterlockedAdd64((LONG64 *)ep, 0)) // load with memory barrier +#else + if (!InterlockedAdd((LONG *)ep, 0)) +#endif + { + HANDLE e; + + e = CreateEvent(NULL, FALSE, FALSE, NULL); + if (!InterlockedCompareExchangePointer(ep, e, NULL)) + WaitForSingleObject(e, INFINITE); + CloseHandle(e); + } +} + +/* + * This implementation using an MCS lock variation requires only a single + * pointer of shared global state initialized to null, and in the uncontended + * case does not require allocation of any Windows resources whatsoever. + * + * These locks are described in the paper: + * + * "Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors" + * by John M. Mellor-Crummey and Michael L. Scott. + * ACM Transactions on Computer Systems Volume 9, Issue 1 (Feb. 1991). + * + * The basic idea is that each thread has a local state, which includes a + * pointer to the next waiting thread. The global state is a tail pointer to + * the last waiting thread. The running thread holds the lock and also the + * pointer to the first waiter. + * + * On acquire, atomically swap our fresh local state with the global tail + * pointer, becoming the new last waiter. We receive a pointer to the previous + * last waiter (or nothing, in the unlocked case). At this point it is safe + * for a new thread to come along and update the tail pointer again. If + * needed, we then update the last waiter to point to our thread, signal + * that this is completed, and then wait to be signaled. + * + * On release, if the tail pointer points to us there are no waiters, and this + * can be confirmed with an atomic compare and exchange to the done state, + * which is equivalent to the original state except that a subsequent acquirer + * will know that the initialization has been previously completed. + * + * If that didn't unlock the lock, we need to wait for the signal from the next + * thread (which may not have updated our next pointer yet), then signal the + * next thread to wake up. Eventually the queue will empty and the lock is + * left in the done state, at which point a simple atomic load can determine + * that nothing else needs to happen. + */ static int init_once(void) { - static glwthread_once_t ctrl = GLWTHREAD_ONCE_INIT; + struct once_state * const init_done = (void *)-1; + + struct once_state { + struct once_state *next; + HANDLE ready_event, next_event; + }; + + static void * volatile tail; + struct once_state local = {0}; + struct once_state *p; + + /* fast path for the normal (init completed) case. */ + if (tail == init_done) + return 1; + + if (!(p = InterlockedExchangePointer(&tail, &local))) { + /* we're number one! */ + tls_key = TlsAlloc(); + init_once_cb(); + } else if (p != init_done) { + /* contended, wait for predecessor */ + p->next = &local; + init_once_signal(&p->next_event); + init_once_wait(&local.ready_event); + } + + p = InterlockedCompareExchangePointer(&tail, init_done, &local); + if (p != &local) { + /* contended, wait for successor */ + init_once_wait(&local.next_event); + init_once_signal(&local.next->ready_event); + } - glwthread_once(&ctrl, init_once_with_tls); return 1; }