2 * Copyright © 2024 Nick Bowler
4 * Helpers for hosts using native Windows threading API.
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
11 * The init_once implementation is adapted from the Pthreads-win32 library
12 * implementation based on MCS (Mellor-Crummy Scott) locks, originally
13 * distributed with the following copyright and permission notice:
15 * Pthreads-win32 - POSIX Threads Library for Win32
16 * Copyright(C) 1998 John E. Bossom
17 * Copyright(C) 1999,2005 Pthreads-win32 contributors
19 * This library is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU Lesser General Public
21 * License as published by the Free Software Foundation; either
22 * version 2 of the License, or (at your option) any later version.
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
29 * You should have received a copy of the GNU General Public License
30 * along with this program. If not, see <https://www.gnu.org/licenses/>.
33 #define WIN32_LEAN_AND_MEAN
36 static DWORD tls_key = TLS_OUT_OF_INDEXES;
37 #define tls_key_valid (tls_key != TLS_OUT_OF_INDEXES)
39 static void init_once_cb(void);
41 static void *tls_get(void)
43 return TlsGetValue(tls_key);
46 static BOOL tls_set(void *p)
48 return TlsSetValue(tls_key, p);
52 * Synchronize with another thread's call to init_once_wait on the same
53 * HANDLE object, which must be initialized to null. If init_once_wait
54 * got there first, we receive a handle to the event object which can
57 static void init_once_signal(HANDLE *ep)
60 * Note that INVALID_HANDLE_VALUE is distinct from a null pointer
61 * and also distinct from any handle returned by CreateEvent.
63 HANDLE e = INVALID_HANDLE_VALUE;
65 if ((e = InterlockedCompareExchangePointer(ep, e, NULL)))
70 * Synchronize with another thread's call to init_once_signal on the same
73 static void init_once_wait(HANDLE *ep)
76 if (!InterlockedAdd64((LONG64 *)ep, 0)) // load with memory barrier
78 if (!InterlockedAdd((LONG *)ep, 0))
83 e = CreateEvent(NULL, FALSE, FALSE, NULL);
84 if (!InterlockedCompareExchangePointer(ep, e, NULL))
85 WaitForSingleObject(e, INFINITE);
91 * This implementation using an MCS lock variation requires only a single
92 * pointer of shared global state initialized to null, and in the uncontended
93 * case does not require allocation of any Windows resources whatsoever.
95 * These locks are described in the paper:
97 * "Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors"
98 * by John M. Mellor-Crummey and Michael L. Scott.
99 * ACM Transactions on Computer Systems Volume 9, Issue 1 (Feb. 1991).
101 * The basic idea is that each thread has a local state, which includes a
102 * pointer to the next waiting thread. The global state is a tail pointer to
103 * the last waiting thread. The running thread holds the lock and also the
104 * pointer to the first waiter.
106 * On acquire, atomically swap our fresh local state with the global tail
107 * pointer, becoming the new last waiter. We receive a pointer to the previous
108 * last waiter (or nothing, in the unlocked case). At this point it is safe
109 * for a new thread to come along and update the tail pointer again. If
110 * needed, we then update the last waiter to point to our thread, signal
111 * that this is completed, and then wait to be signaled.
113 * On release, if the tail pointer points to us there are no waiters, and this
114 * can be confirmed with an atomic compare and exchange to the done state,
115 * which is equivalent to the original state except that a subsequent acquirer
116 * will know that the initialization has been previously completed.
118 * If that didn't unlock the lock, we need to wait for the signal from the next
119 * thread (which may not have updated our next pointer yet), then signal the
120 * next thread to wake up. Eventually the queue will empty and the lock is
121 * left in the done state, at which point a simple atomic load can determine
122 * that nothing else needs to happen.
124 static int init_once(void)
126 struct once_state * const init_done = (void *)-1;
129 struct once_state *next;
130 HANDLE ready_event, next_event;
133 static void * volatile tail;
134 struct once_state local = {0};
135 struct once_state *p;
137 /* fast path for the normal (init completed) case. */
138 if (tail == init_done)
141 if (!(p = InterlockedExchangePointer(&tail, &local))) {
142 /* we're number one! */
143 tls_key = TlsAlloc();
145 } else if (p != init_done) {
146 /* contended, wait for predecessor */
148 init_once_signal(&p->next_event);
149 init_once_wait(&local.ready_event);
152 p = InterlockedCompareExchangePointer(&tail, init_done, &local);
154 /* contended, wait for successor */
155 init_once_wait(&local.next_event);
156 init_once_signal(&local.next->ready_event);
162 #if !TEST_W32_NO_DLLMAIN
165 * On Windows, DLLs are notified of thread exit via the DllMain entry point.
166 * This works in all versions.
171 BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID p)
173 if (reason == DLL_THREAD_DETACH && tls_key != TLS_OUT_OF_INDEXES)
174 free(TlsGetValue(tls_key));
180 * We can achieve similar behaviour with static linking executables by
181 * putting a pointer to the entry point in a special section.
183 * I believe this is supported beginning around Windows XP.
186 #pragma data_seg(".CRT$XLF")
188 __attribute__((section(".CRT$XLF")))
190 PIMAGE_TLS_CALLBACK cdecl__tls_hook = (PIMAGE_TLS_CALLBACK)DllMain;