Skip to content

Commit 051cc01

Browse files
authored
fix(monitor): release Monitor._thread_lock after fork (#6148) (#6159)
### Description If `os.fork()` runs while another thread holds `Monitor._thread_lock`, the child process inherits the lock locked. The holding thread does not exist in the child, so the lock can never be released and `_ensure_running` deadlocks forever. Register an `os.register_at_fork(after_in_child=...)` hook that recreates `_thread_lock` and clears the cached thread / pid in the child, so the new process always starts with a clean monitor state regardless of what the parent was doing at fork time. Adds a regression test that acquires the lock, forks, and asserts the child sees a fresh, unheld lock. #### Issues * resolves: #6148 * resolves: [PY-2390](https://linear.app/getsentry/issue/PY-2390/monitor-thread-lock-leaked-into-forked-children-causing-child)
1 parent c24419e commit 051cc01

2 files changed

Lines changed: 70 additions & 4 deletions

File tree

sentry_sdk/monitor.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import time
3+
import weakref
34
from threading import Thread, Lock
45

56
import sentry_sdk
@@ -23,6 +24,9 @@ class Monitor:
2324

2425
name = "sentry.monitor"
2526

27+
_thread: "Optional[Thread]"
28+
_thread_for_pid: "Optional[int]"
29+
2630
def __init__(
2731
self, transport: "sentry_sdk.transport.Transport", interval: float = 10
2832
) -> None:
@@ -31,11 +35,33 @@ def __init__(
3135

3236
self._healthy = True
3337
self._downsample_factor: int = 0
34-
35-
self._thread: "Optional[Thread]" = None
36-
self._thread_lock = Lock()
37-
self._thread_for_pid: "Optional[int]" = None
3838
self._running = True
39+
self._reset_thread_state()
40+
41+
# See https://github.com/getsentry/sentry-python/issues/6148.
42+
# If os.fork() runs while another thread holds self._thread_lock,
43+
# the child inherits the lock locked but the holding thread does
44+
# not exist in the child, so the lock can never be released and
45+
# _ensure_running deadlocks forever. Reinitialise the lock and
46+
# cached thread/pid in the child so it starts clean regardless
47+
# of inherited state. We bind via a WeakMethod so the
48+
# permanently-registered fork handler does not pin this Monitor
49+
# (and its Transport): register_at_fork has no unregister API.
50+
# POSIX-only; Windows uses spawn.
51+
if hasattr(os, "register_at_fork"):
52+
weak_reset = weakref.WeakMethod(self._reset_thread_state)
53+
54+
def _reset_in_child() -> None:
55+
method = weak_reset()
56+
if method is not None:
57+
method()
58+
59+
os.register_at_fork(after_in_child=_reset_in_child)
60+
61+
def _reset_thread_state(self) -> None:
62+
self._thread = None
63+
self._thread_lock = Lock()
64+
self._thread_for_pid = None
3965

4066
def _ensure_running(self) -> None:
4167
"""

tests/test_monitor.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1+
import os
2+
import sys
13
from collections import Counter
24
from unittest import mock
35

6+
import pytest
7+
48
import sentry_sdk
59
from sentry_sdk.transport import Transport
610

@@ -99,3 +103,39 @@ def test_monitor_no_thread_on_shutdown_no_errors(sentry_init):
99103
assert monitor._thread is None
100104
monitor.run()
101105
assert monitor._thread is None
106+
107+
108+
@pytest.mark.skipif(
109+
sys.platform == "win32"
110+
or not hasattr(os, "fork")
111+
or not hasattr(os, "register_at_fork"),
112+
reason="requires POSIX fork and os.register_at_fork (Python 3.7+)",
113+
)
114+
def test_monitor_thread_lock_reset_in_child_after_fork(sentry_init):
115+
"""Regression test for #6148.
116+
117+
If os.fork() runs while Monitor._thread_lock is held, the child
118+
inherits the lock locked. The holding thread does not exist in the
119+
child, so the lock can never be released and _ensure_running
120+
deadlocks forever. The after-fork hook must replace the lock with
121+
a fresh one in the child.
122+
"""
123+
sentry_init(transport=HealthyTestTransport())
124+
monitor = sentry_sdk.get_client().monitor
125+
assert monitor is not None
126+
127+
original_lock = monitor._thread_lock
128+
original_lock.acquire()
129+
pid = os.fork()
130+
if pid == 0:
131+
# Child: was the lock object replaced and is the new one not
132+
# held? Without the fix, _thread_lock is `original_lock`
133+
# inherited locked, so `replaced` is False. blocking=False
134+
# guarantees the child can't hang on a regression.
135+
replaced = monitor._thread_lock is not original_lock
136+
unheld = monitor._thread_lock.acquire(blocking=False)
137+
os._exit(0 if replaced and unheld else 1)
138+
139+
original_lock.release()
140+
_, status = os.waitpid(pid, 0)
141+
assert os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0

0 commit comments

Comments
 (0)