cygthread: suspend thread before terminating.
This addresses an extremely difficult to debug deadlock when running under emulation on ARM64. A relatively easy way to trigger this bug is to call `fork()`, then within the child process immediately call another `fork()` and then `exit()` the intermediate process. It would seem that there is a "code emulation" lock on the wait thread at this stage, and if the thread is terminated too early, that lock still exists albeit without a thread, and nothing moves forward. It seems that a `SuspendThread()` combined with a `GetThreadContext()` (to force the thread to _actually_ be suspended, for more details see https://devblogs.microsoft.com/oldnewthing/20150205-00/?p=44743) makes sure the thread is "booted" from emulation before it is suspended. Hopefully this means it won't be holding any locks or otherwise leave emulation in a bad state when the thread is terminated. Also, attempt to use `CancelSynchonousIo()` (as seen in `flock.cc`) to avoid the need for `TerminateThread()` altogether. This doesn't always work, however, so was not a complete fix for the deadlock issue. Addresses: https://cygwin.com/pipermail/cygwin-developers/2024-May/012694.html Signed-off-by: Jeremy Drake <cygwin@jdrake.com>
This commit is contained in:
parent
b2672642c1
commit
b091b47b9e
|
@ -302,6 +302,20 @@ cygthread::terminate_thread ()
|
|||
if (!inuse)
|
||||
goto force_notterminated;
|
||||
|
||||
if (_my_tls._ctinfo != this)
|
||||
{
|
||||
CONTEXT context;
|
||||
context.ContextFlags = CONTEXT_CONTROL;
|
||||
/* SuspendThread makes sure a thread is "booted" from emulation before
|
||||
it is suspended. As such, the emulator hopefully won't be in a bad
|
||||
state (aka, holding any locks) when the thread is terminated. */
|
||||
SuspendThread (h);
|
||||
/* We need to call GetThreadContext, even though we don't care about the
|
||||
context, because SuspendThread is asynchronous and GetThreadContext
|
||||
will make sure the thread is *really* suspended before returning */
|
||||
GetThreadContext (h, &context);
|
||||
}
|
||||
|
||||
TerminateThread (h, 0);
|
||||
WaitForSingleObject (h, INFINITE);
|
||||
CloseHandle (h);
|
||||
|
|
|
@ -1252,12 +1252,16 @@ proc_waiter (void *arg)
|
|||
|
||||
for (;;)
|
||||
{
|
||||
DWORD nb;
|
||||
DWORD nb, err;
|
||||
char buf = '\0';
|
||||
|
||||
if (!ReadFile (vchild.rd_proc_pipe, &buf, 1, &nb, NULL)
|
||||
&& GetLastError () != ERROR_BROKEN_PIPE)
|
||||
&& (err = GetLastError ()) != ERROR_BROKEN_PIPE)
|
||||
{
|
||||
/* ERROR_OPERATION_ABORTED is expected due to the possibility that
|
||||
CancelSynchronousIo interruped the ReadFile call, so don't output
|
||||
that error */
|
||||
if (err != ERROR_OPERATION_ABORTED)
|
||||
system_printf ("error on read of child wait pipe %p, %E", vchild.rd_proc_pipe);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -409,7 +409,11 @@ proc_terminate ()
|
|||
to 1 iff it is a Cygwin process. */
|
||||
if (!have_execed || !have_execed_cygwin)
|
||||
chld_procs[i]->ppid = 1;
|
||||
if (chld_procs[i].wait_thread)
|
||||
/* Attempt to exit the wait_thread cleanly via CancelSynchronousIo
|
||||
before falling back to the (explicitly dangerous) cross-thread
|
||||
termination */
|
||||
if (chld_procs[i].wait_thread
|
||||
&& !CancelSynchronousIo (chld_procs[i].wait_thread->thread_handle ()))
|
||||
chld_procs[i].wait_thread->terminate_thread ();
|
||||
/* Release memory associated with this process unless it is 'myself'.
|
||||
'myself' is only in the chld_procs table when we've execed. We
|
||||
|
@ -1174,7 +1178,11 @@ remove_proc (int ci)
|
|||
{
|
||||
if (have_execed)
|
||||
{
|
||||
if (_my_tls._ctinfo != chld_procs[ci].wait_thread)
|
||||
/* Attempt to exit the wait_thread cleanly via CancelSynchronousIo
|
||||
before falling back to the (explicitly dangerous) cross-thread
|
||||
termination */
|
||||
if (_my_tls._ctinfo != chld_procs[ci].wait_thread
|
||||
&& !CancelSynchronousIo (chld_procs[ci].wait_thread->thread_handle ()))
|
||||
chld_procs[ci].wait_thread->terminate_thread ();
|
||||
}
|
||||
else if (chld_procs[ci] && chld_procs[ci]->exists ())
|
||||
|
|
Loading…
Reference in New Issue