Python’s faulthandler module dumps Python tracebacks on low-level faults (segfaults, etc.) and on demand — essential for debugging crashes in C extensions. import faulthandler. Enable: faulthandler.enable(file=sys.stderr, all_threads=True) — installs handlers for SIGSEGV, SIGFPE, SIGABRT, SIGBUS, SIGILL; all_threads=True includes all thread stacks. Disable: faulthandler.disable(). Check: faulthandler.is_enabled() → bool. Dump now: faulthandler.dump_traceback(file=sys.stderr, all_threads=True) — prints current stacks without waiting for a crash. Timed dump: faulthandler.dump_traceback_later(timeout, repeat=False, file=sys.stderr) — fires after timeout seconds; repeat=True re-schedules on each fire (watchdog pattern). Cancel: faulthandler.cancel_dump_traceback_later(). Custom signal: faulthandler.register(signum, file=sys.stderr, all_threads=True, chain=False) — dump on any POSIX signal (e.g. SIGUSR1); chain=True also calls the previous handler. Unregister: faulthandler.unregister(signum). File arg: accepts a file object (open(...)) or a file descriptor (int). Also activatable via PYTHONFAULTHANDLER=1 env variable or python -X faulthandler. Claude Code generates crash reporters, deadlock detectors, timeout watchdogs, and production process monitors.
CLAUDE.md for faulthandler
## faulthandler Stack
- Stdlib: import faulthandler, signal, sys
- Enable: faulthandler.enable() # call at process startup
- Dump: faulthandler.dump_traceback() # immediate stack dump
- Watchdog: faulthandler.dump_traceback_later(30, repeat=True)
- Signal: faulthandler.register(signal.SIGUSR1) # dump on SIGUSR1
- Env: PYTHONFAULTHANDLER=1 OR python -X faulthandler
- Note: enable() is idempotent and cheap; always enable in production
faulthandler Crash Debug Pipeline
# app/faulthandlerutil.py — enable, dump, watchdog, crash reporter, deadlock
from __future__ import annotations
import contextlib
import faulthandler
import io
import os
import signal
import sys
import threading
import time
from dataclasses import dataclass, field
from pathlib import Path
# ─────────────────────────────────────────────────────────────────────────────
# 1. Basic enable / status helpers
# ─────────────────────────────────────────────────────────────────────────────
def enable_faulthandler(
file=None,
all_threads: bool = True,
) -> None:
"""
Enable the fault handler if not already enabled.
Defaults to stderr with all_threads=True.
Example:
enable_faulthandler()
"""
if not faulthandler.is_enabled():
faulthandler.enable(file=file or sys.stderr, all_threads=all_threads)
def faulthandler_status() -> dict:
"""
Return a status dict with enabled flag and registered signals.
Example:
print(faulthandler_status())
"""
return {
"enabled": faulthandler.is_enabled(),
"platform": sys.platform,
}
def dump_all_threads(file=None) -> None:
"""
Immediately dump the current traceback of all threads.
Example:
dump_all_threads()
"""
faulthandler.dump_traceback(
file=file or sys.stderr,
all_threads=True,
)
def capture_traceback_string(all_threads: bool = True) -> str:
"""
Capture faulthandler.dump_traceback output as a string.
Example:
tb_str = capture_traceback_string()
print(tb_str[:300])
"""
buf = io.StringIO()
faulthandler.dump_traceback(file=buf, all_threads=all_threads)
return buf.getvalue()
# ─────────────────────────────────────────────────────────────────────────────
# 2. File-backed crash log
# ─────────────────────────────────────────────────────────────────────────────
class CrashLog:
"""
Route fault-handler output to a file so crash tracebacks survive process death.
Example:
crash_log = CrashLog("/var/log/myapp/crashes.txt")
crash_log.enable()
# ... run application ...
crash_log.disable()
"""
def __init__(self, path: "str | Path", all_threads: bool = True) -> None:
self._path = Path(path)
self._all_threads = all_threads
self._file = None
def enable(self) -> None:
"""Open log file and enable fault handler writing to it."""
self._path.parent.mkdir(parents=True, exist_ok=True)
self._file = self._path.open("a")
faulthandler.enable(file=self._file, all_threads=self._all_threads)
def disable(self) -> None:
"""Disable fault handler and close the log file."""
faulthandler.disable()
if self._file:
try:
self._file.close()
except Exception:
pass
self._file = None
def __enter__(self) -> "CrashLog":
self.enable()
return self
def __exit__(self, *_: object) -> None:
self.disable()
# ─────────────────────────────────────────────────────────────────────────────
# 3. Watchdog timer
# ─────────────────────────────────────────────────────────────────────────────
class FaultWatchdog:
"""
Arm a repeating dump_traceback_later watchdog.
If the process becomes unresponsive (e.g. deadlock), the watchdog
dumps all thread stacks to the log file.
Example:
with FaultWatchdog(timeout=30, log_path="/tmp/watchdog.log"):
do_long_operation()
# watchdog disarmed automatically on exit
"""
def __init__(
self,
timeout: float,
log_path: "str | Path | None" = None,
repeat: bool = True,
) -> None:
self._timeout = timeout
self._log_path = Path(log_path) if log_path else None
self._repeat = repeat
self._file = None
def arm(self) -> None:
"""Start the watchdog timer."""
if self._log_path:
self._log_path.parent.mkdir(parents=True, exist_ok=True)
self._file = self._log_path.open("a")
f = self._file
else:
f = sys.stderr
faulthandler.dump_traceback_later(
self._timeout,
repeat=self._repeat,
file=f,
)
def disarm(self) -> None:
"""Cancel the watchdog timer."""
faulthandler.cancel_dump_traceback_later()
if self._file:
try:
self._file.close()
except Exception:
pass
self._file = None
def __enter__(self) -> "FaultWatchdog":
self.arm()
return self
def __exit__(self, *_: object) -> None:
self.disarm()
# ─────────────────────────────────────────────────────────────────────────────
# 4. Signal-triggered traceback dump
# ─────────────────────────────────────────────────────────────────────────────
_registered_signals: list[int] = []
def register_dump_signal(
signum: int = signal.SIGUSR1 if hasattr(signal, "SIGUSR1") else 0,
file=None,
all_threads: bool = True,
) -> bool:
"""
Register a signal that triggers a traceback dump (default SIGUSR1).
Send the signal with: kill -USR1 <pid>
Returns True if registration succeeded.
Example:
register_dump_signal()
print(f"Send SIGUSR1 to PID {os.getpid()} for a stack dump")
"""
if signum == 0 or not hasattr(faulthandler, "register"):
return False
faulthandler.register(
signum,
file=file or sys.stderr,
all_threads=all_threads,
chain=False,
)
_registered_signals.append(signum)
return True
def unregister_all_dump_signals() -> None:
"""Unregister all previously registered dump signals."""
for sig in _registered_signals:
faulthandler.unregister(sig)
_registered_signals.clear()
# ─────────────────────────────────────────────────────────────────────────────
# 5. Deadlock detector
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class DeadlockDetector:
"""
Run a callable in a thread with a watchdog that dumps stacks if it takes
longer than timeout seconds, helping diagnose deadlocks.
Example:
def tricky():
lock = threading.Lock()
lock.acquire()
lock.acquire() # deadlock
detector = DeadlockDetector(timeout=2.0, log_path="/tmp/deadlock.log")
try:
detector.run(tricky)
except TimeoutError:
print("deadlock detected — check /tmp/deadlock.log")
"""
timeout: float
log_path: "str | Path | None" = None
def run(self, fn, *args, **kwargs) -> object:
"""
Run fn in the current thread with a watchdog.
Raises TimeoutError if the watchdog fires (fn hasn't returned).
"""
result_holder: list = []
exc_holder: list = []
done = threading.Event()
def target():
try:
result_holder.append(fn(*args, **kwargs))
except Exception as e:
exc_holder.append(e)
finally:
done.set()
t = threading.Thread(target=target, daemon=True)
with FaultWatchdog(self.timeout, log_path=self.log_path, repeat=False):
t.start()
finished = done.wait(timeout=self.timeout + 0.5)
if not finished:
raise TimeoutError(
f"Function {fn.__name__!r} did not complete within "
f"{self.timeout}s — traceback dumped"
)
if exc_holder:
raise exc_holder[0]
return result_holder[0] if result_holder else None
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile
print("=== faulthandler demo ===")
# ── enable / status ────────────────────────────────────────────────────────
print("\n--- status before enable ---")
print(f" {faulthandler_status()}")
enable_faulthandler()
print("\n--- status after enable ---")
print(f" {faulthandler_status()}")
# ── capture traceback string ───────────────────────────────────────────────
print("\n--- capture_traceback_string ---")
tb = capture_traceback_string(all_threads=True)
lines = tb.strip().splitlines()
print(f" captured {len(lines)} lines")
for line in lines[:4]:
print(f" {line}")
# ── CrashLog ──────────────────────────────────────────────────────────────
print("\n--- CrashLog ---")
with tempfile.TemporaryDirectory() as td:
log_path = Path(td) / "crash.log"
with CrashLog(log_path) as cl:
# Simulate writing a manual dump
faulthandler.dump_traceback(file=cl._file or sys.stderr)
if log_path.exists():
content = log_path.read_text()
print(f" crash.log size: {len(content)} bytes")
print(f" first line: {content.splitlines()[0]!r}")
# ── FaultWatchdog ─────────────────────────────────────────────────────────
print("\n--- FaultWatchdog (short timeout) ---")
with tempfile.TemporaryDirectory() as td:
wd_log = Path(td) / "watchdog.log"
start = time.monotonic()
with FaultWatchdog(timeout=0.3, log_path=wd_log, repeat=False):
# Do nothing — watchdog fires after 0.3s
time.sleep(0.5)
elapsed = time.monotonic() - start
print(f" elapsed: {elapsed:.2f}s")
if wd_log.exists():
print(f" watchdog log size: {wd_log.stat().st_size} bytes")
# ── register_dump_signal ──────────────────────────────────────────────────
print("\n--- register_dump_signal ---")
if hasattr(signal, "SIGUSR1"):
ok = register_dump_signal(signal.SIGUSR1)
print(f" SIGUSR1 registered: {ok}")
print(f" send 'kill -USR1 {os.getpid()}' to dump stacks")
unregister_all_dump_signals()
print(f" unregistered all dump signals")
else:
print(" SIGUSR1 not available on this platform")
# ── DeadlockDetector ──────────────────────────────────────────────────────
print("\n--- DeadlockDetector ---")
with tempfile.TemporaryDirectory() as td:
dd = DeadlockDetector(timeout=0.2, log_path=Path(td) / "dd.log")
# Fast function — should succeed
result = dd.run(lambda: 42)
print(f" fast function result: {result}")
# Slow function — should time out
try:
dd.run(lambda: time.sleep(5))
except TimeoutError as e:
print(f" slow function: TimeoutError raised (expected)")
print("\n=== done ===")
For the signal module alternative — signal.signal(signal.SIGUSR1, handler) lets you register a Python-level callback on POSIX signals — use signal when you need a full Python callback that accesses application state on the signal; use faulthandler.register(signum) when you only need a stack dump (it fires at C level even if the GIL is held, making it safe for diagnosing deadlocks where Python-level signal handlers would never run). For the traceback.print_stack / threading.enumerate alternative — walking threading.enumerate() and calling traceback.extract_stack(sys._current_frames()[t.ident]) produces thread stacks from Python — use this approach when you want to process stack frames programmatically (filter, format, log to JSON); use faulthandler.dump_traceback() when you need a low-level signal-safe dump that bypasses the GIL and works even during interpreter-level crashes. The Claude Skills 360 bundle includes faulthandler skill sets covering enable_faulthandler()/faulthandler_status()/dump_all_threads()/capture_traceback_string() core helpers, CrashLog file-backed crash log context manager, FaultWatchdog repeating dump_traceback_later context manager, register_dump_signal()/unregister_all_dump_signals() SIGUSR1 helpers, and DeadlockDetector.run() with timeout and watchdog integration. Start with the free tier to try crash debug patterns and faulthandler pipeline code generation.