X-Git-Url: http://git.draconx.ca/gitweb/cdecl99.git/blobdiff_plain/1b96e9580714d7a677d10a8e286b3f8a2bddc6f5..HEAD:/src/thread-w32.h?ds=sidebyside diff --git a/src/thread-w32.h b/src/thread-w32.h index 44282d8..393f0de 100644 --- a/src/thread-w32.h +++ b/src/thread-w32.h @@ -8,6 +8,19 @@ * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * + * The init_once implementation is adapted from the Pthreads-win32 library + * implementation based on MCS (Mellor-Crummy Scott) locks, originally + * distributed with the following copyright and permission notice: + * + * Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright(C) 1998 John E. Bossom + * Copyright(C) 1999,2005 Pthreads-win32 contributors + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -20,13 +33,10 @@ #define WIN32_LEAN_AND_MEAN #include -static DWORD tls_key; +static DWORD tls_key = TLS_OUT_OF_INDEXES; #define tls_key_valid (tls_key != TLS_OUT_OF_INDEXES) -static void tls_key_init(void) -{ - tls_key = TlsAlloc(); -} +static void init_once_cb(void); static void *tls_get(void) { @@ -37,3 +47,148 @@ static BOOL tls_set(void *p) { return TlsSetValue(tls_key, p); } + +/* + * Synchronize with another thread's call to init_once_wait on the same + * HANDLE object, which must be initialized to null. If init_once_wait + * got there first, we receive a handle to the event object which can + * then be signaled. + */ +static void init_once_signal(HANDLE *ep) +{ + /* + * Note that INVALID_HANDLE_VALUE is distinct from a null pointer + * and also distinct from any handle returned by CreateEvent. + */ + HANDLE e = INVALID_HANDLE_VALUE; + + if ((e = InterlockedCompareExchangePointer(ep, e, NULL))) + SetEvent(e); +} + +/* + * Synchronize with another thread's call to init_once_signal on the same + * HANDLE object. + */ +static void init_once_wait(HANDLE *ep) +{ +#if _WIN64 + if (!InterlockedAdd64((LONG64 *)ep, 0)) // load with memory barrier +#else + if (!InterlockedAdd((LONG *)ep, 0)) +#endif + { + HANDLE e; + + e = CreateEvent(NULL, FALSE, FALSE, NULL); + if (!InterlockedCompareExchangePointer(ep, e, NULL)) + WaitForSingleObject(e, INFINITE); + CloseHandle(e); + } +} + +/* + * This implementation using an MCS lock variation requires only a single + * pointer of shared global state initialized to null, and in the uncontended + * case does not require allocation of any Windows resources whatsoever. + * + * These locks are described in the paper: + * + * "Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors" + * by John M. Mellor-Crummey and Michael L. Scott. + * ACM Transactions on Computer Systems Volume 9, Issue 1 (Feb. 1991). + * + * The basic idea is that each thread has a local state, which includes a + * pointer to the next waiting thread. The global state is a tail pointer to + * the last waiting thread. The running thread holds the lock and also the + * pointer to the first waiter. + * + * On acquire, atomically swap our fresh local state with the global tail + * pointer, becoming the new last waiter. We receive a pointer to the previous + * last waiter (or nothing, in the unlocked case). At this point it is safe + * for a new thread to come along and update the tail pointer again. If + * needed, we then update the last waiter to point to our thread, signal + * that this is completed, and then wait to be signaled. + * + * On release, if the tail pointer points to us there are no waiters, and this + * can be confirmed with an atomic compare and exchange to the done state, + * which is equivalent to the original state except that a subsequent acquirer + * will know that the initialization has been previously completed. + * + * If that didn't unlock the lock, we need to wait for the signal from the next + * thread (which may not have updated our next pointer yet), then signal the + * next thread to wake up. Eventually the queue will empty and the lock is + * left in the done state, at which point a simple atomic load can determine + * that nothing else needs to happen. + */ +static int init_once(void) +{ + struct once_state * const init_done = (void *)-1; + + struct once_state { + struct once_state *next; + HANDLE ready_event, next_event; + }; + + static void * volatile tail; + struct once_state local = {0}; + struct once_state *p; + + /* fast path for the normal (init completed) case. */ + if (tail == init_done) + return 1; + + if (!(p = InterlockedExchangePointer(&tail, &local))) { + /* we're number one! */ + tls_key = TlsAlloc(); + init_once_cb(); + } else if (p != init_done) { + /* contended, wait for predecessor */ + p->next = &local; + init_once_signal(&p->next_event); + init_once_wait(&local.ready_event); + } + + p = InterlockedCompareExchangePointer(&tail, init_done, &local); + if (p != &local) { + /* contended, wait for successor */ + init_once_wait(&local.next_event); + init_once_signal(&local.next->ready_event); + } + + return 1; +} + +#if !TEST_W32_NO_DLLMAIN + +/* + * On Windows, DLLs are notified of thread exit via the DllMain entry point. + * This works in all versions. + */ +#if !DLL_EXPORT +static +#endif +BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID p) +{ + if (reason == DLL_THREAD_DETACH && tls_key != TLS_OUT_OF_INDEXES) + free(TlsGetValue(tls_key)); + + return TRUE; +} + +/* + * We can achieve similar behaviour with static linking executables by + * putting a pointer to the entry point in a special section. + * + * I believe this is supported beginning around Windows XP. + */ +#if !DLL_EXPORT +#pragma data_seg(".CRT$XLF") +#if __GNUC__ +__attribute__((section(".CRT$XLF"))) +#endif +PIMAGE_TLS_CALLBACK cdecl__tls_hook = (PIMAGE_TLS_CALLBACK)DllMain; +#pragma data_seg() +#endif + +#endif