/*
* Copyright © 2024 Nick Bowler
*
* Helpers for hosts using native Windows threading API.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The init_once implementation is adapted from the Pthreads-win32 library
* implementation based on MCS (Mellor-Crummy Scott) locks, originally
* distributed with the following copyright and permission notice:
*
* Pthreads-win32 - POSIX Threads Library for Win32
* Copyright(C) 1998 John E. Bossom
* Copyright(C) 1999,2005 Pthreads-win32 contributors
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#define WIN32_LEAN_AND_MEAN
#include
static DWORD tls_key = TLS_OUT_OF_INDEXES;
#define tls_key_valid (tls_key != TLS_OUT_OF_INDEXES)
static void init_once_cb(void);
static void *tls_get(void)
{
return TlsGetValue(tls_key);
}
static BOOL tls_set(void *p)
{
return TlsSetValue(tls_key, p);
}
/*
* Synchronize with another thread's call to init_once_wait on the same
* HANDLE object, which must be initialized to null. If init_once_wait
* got there first, we receive a handle to the event object which can
* then be signaled.
*/
static void init_once_signal(HANDLE *ep)
{
/*
* Note that INVALID_HANDLE_VALUE is distinct from a null pointer
* and also distinct from any handle returned by CreateEvent.
*/
HANDLE e = INVALID_HANDLE_VALUE;
if ((e = InterlockedCompareExchangePointer(ep, e, NULL)))
SetEvent(e);
}
/*
* Synchronize with another thread's call to init_once_signal on the same
* HANDLE object.
*/
static void init_once_wait(HANDLE *ep)
{
#if _WIN64
if (!InterlockedAdd64((LONG64 *)ep, 0)) // load with memory barrier
#else
if (!InterlockedAdd((LONG *)ep, 0))
#endif
{
HANDLE e;
e = CreateEvent(NULL, FALSE, FALSE, NULL);
if (!InterlockedCompareExchangePointer(ep, e, NULL))
WaitForSingleObject(e, INFINITE);
CloseHandle(e);
}
}
/*
* This implementation using an MCS lock variation requires only a single
* pointer of shared global state initialized to null, and in the uncontended
* case does not require allocation of any Windows resources whatsoever.
*
* These locks are described in the paper:
*
* "Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors"
* by John M. Mellor-Crummey and Michael L. Scott.
* ACM Transactions on Computer Systems Volume 9, Issue 1 (Feb. 1991).
*
* The basic idea is that each thread has a local state, which includes a
* pointer to the next waiting thread. The global state is a tail pointer to
* the last waiting thread. The running thread holds the lock and also the
* pointer to the first waiter.
*
* On acquire, atomically swap our fresh local state with the global tail
* pointer, becoming the new last waiter. We receive a pointer to the previous
* last waiter (or nothing, in the unlocked case). At this point it is safe
* for a new thread to come along and update the tail pointer again. If
* needed, we then update the last waiter to point to our thread, signal
* that this is completed, and then wait to be signaled.
*
* On release, if the tail pointer points to us there are no waiters, and this
* can be confirmed with an atomic compare and exchange to the done state,
* which is equivalent to the original state except that a subsequent acquirer
* will know that the initialization has been previously completed.
*
* If that didn't unlock the lock, we need to wait for the signal from the next
* thread (which may not have updated our next pointer yet), then signal the
* next thread to wake up. Eventually the queue will empty and the lock is
* left in the done state, at which point a simple atomic load can determine
* that nothing else needs to happen.
*/
static int init_once(void)
{
struct once_state * const init_done = (void *)-1;
struct once_state {
struct once_state *next;
HANDLE ready_event, next_event;
};
static void * volatile tail;
struct once_state local = {0};
struct once_state *p;
/* fast path for the normal (init completed) case. */
if (tail == init_done)
return 1;
if (!(p = InterlockedExchangePointer(&tail, &local))) {
/* we're number one! */
tls_key = TlsAlloc();
init_once_cb();
} else if (p != init_done) {
/* contended, wait for predecessor */
p->next = &local;
init_once_signal(&p->next_event);
init_once_wait(&local.ready_event);
}
p = InterlockedCompareExchangePointer(&tail, init_done, &local);
if (p != &local) {
/* contended, wait for successor */
init_once_wait(&local.next_event);
init_once_signal(&local.next->ready_event);
}
return 1;
}
#if !TEST_W32_NO_DLLMAIN
/*
* On Windows, DLLs are notified of thread exit via the DllMain entry point.
* This works in all versions.
*/
#if !DLL_EXPORT
static
#endif
BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID p)
{
if (reason == DLL_THREAD_DETACH && tls_key != TLS_OUT_OF_INDEXES)
free(TlsGetValue(tls_key));
return TRUE;
}
/*
* We can achieve similar behaviour with static linking executables by
* putting a pointer to the entry point in a special section.
*
* I believe this is supported beginning around Windows XP.
*/
#if !DLL_EXPORT
#pragma data_seg(".CRT$XLF")
#if __GNUC__
__attribute__((section(".CRT$XLF")))
#endif
PIMAGE_TLS_CALLBACK cdecl__tls_hook = (PIMAGE_TLS_CALLBACK)DllMain;
#pragma data_seg()
#endif
#endif