/* * Copyright © 2024 Nick Bowler * * Helpers for hosts using native Windows threading API. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The init_once implementation is adapted from the Pthreads-win32 library * implementation based on MCS (Mellor-Crummy Scott) locks, originally * distributed with the following copyright and permission notice: * * Pthreads-win32 - POSIX Threads Library for Win32 * Copyright(C) 1998 John E. Bossom * Copyright(C) 1999,2005 Pthreads-win32 contributors * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #define WIN32_LEAN_AND_MEAN #include static DWORD tls_key = TLS_OUT_OF_INDEXES; #define tls_key_valid (tls_key != TLS_OUT_OF_INDEXES) static void init_once_cb(void); static void *tls_get(void) { return TlsGetValue(tls_key); } static BOOL tls_set(void *p) { return TlsSetValue(tls_key, p); } /* * Synchronize with another thread's call to init_once_wait on the same * HANDLE object, which must be initialized to null. If init_once_wait * got there first, we receive a handle to the event object which can * then be signaled. */ static void init_once_signal(HANDLE *ep) { /* * Note that INVALID_HANDLE_VALUE is distinct from a null pointer * and also distinct from any handle returned by CreateEvent. */ HANDLE e = INVALID_HANDLE_VALUE; if ((e = InterlockedCompareExchangePointer(ep, e, NULL))) SetEvent(e); } /* * Synchronize with another thread's call to init_once_signal on the same * HANDLE object. */ static void init_once_wait(HANDLE *ep) { #if _WIN64 if (!InterlockedAdd64((LONG64 *)ep, 0)) // load with memory barrier #else if (!InterlockedAdd((LONG *)ep, 0)) #endif { HANDLE e; e = CreateEvent(NULL, FALSE, FALSE, NULL); if (!InterlockedCompareExchangePointer(ep, e, NULL)) WaitForSingleObject(e, INFINITE); CloseHandle(e); } } /* * This implementation using an MCS lock variation requires only a single * pointer of shared global state initialized to null, and in the uncontended * case does not require allocation of any Windows resources whatsoever. * * These locks are described in the paper: * * "Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors" * by John M. Mellor-Crummey and Michael L. Scott. * ACM Transactions on Computer Systems Volume 9, Issue 1 (Feb. 1991). * * The basic idea is that each thread has a local state, which includes a * pointer to the next waiting thread. The global state is a tail pointer to * the last waiting thread. The running thread holds the lock and also the * pointer to the first waiter. * * On acquire, atomically swap our fresh local state with the global tail * pointer, becoming the new last waiter. We receive a pointer to the previous * last waiter (or nothing, in the unlocked case). At this point it is safe * for a new thread to come along and update the tail pointer again. If * needed, we then update the last waiter to point to our thread, signal * that this is completed, and then wait to be signaled. * * On release, if the tail pointer points to us there are no waiters, and this * can be confirmed with an atomic compare and exchange to the done state, * which is equivalent to the original state except that a subsequent acquirer * will know that the initialization has been previously completed. * * If that didn't unlock the lock, we need to wait for the signal from the next * thread (which may not have updated our next pointer yet), then signal the * next thread to wake up. Eventually the queue will empty and the lock is * left in the done state, at which point a simple atomic load can determine * that nothing else needs to happen. */ static int init_once(void) { struct once_state * const init_done = (void *)-1; struct once_state { struct once_state *next; HANDLE ready_event, next_event; }; static void * volatile tail; struct once_state local = {0}; struct once_state *p; /* fast path for the normal (init completed) case. */ if (tail == init_done) return 1; if (!(p = InterlockedExchangePointer(&tail, &local))) { /* we're number one! */ tls_key = TlsAlloc(); init_once_cb(); } else if (p != init_done) { /* contended, wait for predecessor */ p->next = &local; init_once_signal(&p->next_event); init_once_wait(&local.ready_event); } p = InterlockedCompareExchangePointer(&tail, init_done, &local); if (p != &local) { /* contended, wait for successor */ init_once_wait(&local.next_event); init_once_signal(&local.next->ready_event); } return 1; } #if !TEST_W32_NO_DLLMAIN /* * On Windows, DLLs are notified of thread exit via the DllMain entry point. * This works in all versions. */ #if !DLL_EXPORT static #endif BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID p) { if (reason == DLL_THREAD_DETACH && tls_key != TLS_OUT_OF_INDEXES) free(TlsGetValue(tls_key)); return TRUE; } /* * We can achieve similar behaviour with static linking executables by * putting a pointer to the entry point in a special section. * * I believe this is supported beginning around Windows XP. */ #if !DLL_EXPORT #pragma data_seg(".CRT$XLF") #if __GNUC__ __attribute__((section(".CRT$XLF"))) #endif PIMAGE_TLS_CALLBACK cdecl__tls_hook = (PIMAGE_TLS_CALLBACK)DllMain; #pragma data_seg() #endif #endif