pyinstrument is a sampling call-stack profiler. pip install pyinstrument. CLI: python -m pyinstrument script.py. Programmatic: from pyinstrument import Profiler; p = Profiler(); p.start(); ...; p.stop(); p.print(). Context manager: with Profiler() as p: ...; p.print(). HTML: p.output_html() → HTML string. p.open_in_browser() — opens interactive flamegraph. JSON: p.output(renderer="json"). Text: p.print(unicode=True, color=True, show_all=True). Interval: Profiler(interval=0.001) — 1ms sampling (default 0.001). Async: Profiler(async_mode="enabled") — awaits between samples. with Profiler(async_mode="enabled") as p: await async_fn(). Timeline: Profiler(timeline=True). p.last_session — save/restore session. p.output(renderer=pyinstrument.renderers.SpeedscopeRenderer()). Filter: p.print(show_all=False) — hides library frames. TimeoutError: use p.stop() in finally. Django: pyinstrument.middleware.ProfilerMiddleware. FastAPI: wrap request handler. pytest: pip install pytest-pyinstrument; pytest --profile --profile-svg. @pytest.fixture with Profiler. p.session → serialize. Claude Code generates pyinstrument context managers, async profilers, FastAPI middleware, and CI performance benchmarks.
CLAUDE.md for pyinstrument
## pyinstrument Stack
- Version: pyinstrument >= 4.6 | pip install pyinstrument
- Profile: with Profiler() as p: ...; p.print()
- Async: with Profiler(async_mode="enabled") as p: await fn()
- HTML: p.output_html() → str | p.open_in_browser()
- JSON: p.output(renderer="json") for structured parse
- CLI: python -m pyinstrument --html -o profile.html script.py
pyinstrument Profiling Pipeline
# app/profiler.py — pyinstrument Profiler, async, middleware, session, and pytest helpers
from __future__ import annotations
import asyncio
import json
import time
from contextlib import asynccontextmanager, contextmanager
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable, TypeVar
from pyinstrument import Profiler
from pyinstrument.renderers import HTMLRenderer, JSONRenderer, SpeedscopeRenderer
T = TypeVar("T")
# ─────────────────────────────────────────────────────────────────────────────
# 1. Sync profiler helpers
# ─────────────────────────────────────────────────────────────────────────────
@contextmanager
def profile(
interval: float = 0.001,
show_all: bool = False,
print_output: bool = True,
):
"""
Context manager — profile the enclosed block and print results.
Usage:
with profile():
expensive_function()
interval: sampling interval in seconds (lower = more accurate, more overhead).
show_all: include library frames in output.
print_output: print text report on exit.
"""
p = Profiler(interval=interval)
p.start()
try:
yield p
finally:
p.stop()
if print_output:
p.print(show_all=show_all, unicode=True, color=True)
def profile_call(
fn: Callable,
*args: Any,
interval: float = 0.001,
**kwargs: Any,
) -> tuple[Any, Profiler]:
"""
Profile a single function call.
Returns (result, profiler) — call profiler.print() or profiler.output_html().
Example:
result, p = profile_call(process_data, df, chunk_size=1000)
print(p.output_html())
"""
p = Profiler(interval=interval)
p.start()
result = fn(*args, **kwargs)
p.stop()
return result, p
def save_html(profiler: Profiler, path: str | Path, timeline: bool = False) -> Path:
"""Save an HTML flamegraph to disk."""
p = Path(path)
p.write_text(profiler.output_html(timeline=timeline), encoding="utf-8")
return p
def save_speedscope(profiler: Profiler, path: str | Path) -> Path:
"""Save a Speedscope-compatible JSON profile."""
p = Path(path)
renderer = SpeedscopeRenderer()
p.write_text(profiler.output(renderer=renderer), encoding="utf-8")
return p
def profile_output(profiler: Profiler, fmt: str = "text", **kw) -> str:
"""
Render profiler output.
fmt: "text", "html", "json", "speedscope"
"""
if fmt == "html":
return profiler.output_html(**kw)
if fmt == "json":
return profiler.output(renderer=JSONRenderer())
if fmt == "speedscope":
return profiler.output(renderer=SpeedscopeRenderer())
# text
return profiler.output(show_all=kw.get("show_all", False),
unicode=kw.get("unicode", True),
color=kw.get("color", False))
# ─────────────────────────────────────────────────────────────────────────────
# 2. Async profiler
# ─────────────────────────────────────────────────────────────────────────────
@asynccontextmanager
async def async_profile(
interval: float = 0.001,
print_output: bool = True,
show_all: bool = False,
):
"""
Async context manager — profile async code including awaited coroutines.
Usage:
async with async_profile():
await fetch_all_records()
async_mode="enabled": pyinstrument samples coroutine frames correctly.
"""
p = Profiler(interval=interval, async_mode="enabled")
p.start()
try:
yield p
finally:
p.stop()
if print_output:
p.print(show_all=show_all, unicode=True, color=True)
async def profile_async_call(
coro,
interval: float = 0.001,
) -> tuple[Any, Profiler]:
"""
Profile a coroutine.
Returns (result, profiler).
Example:
result, p = await profile_async_call(fetch_users(db, limit=1000))
save_html(p, "/tmp/fetch_profile.html")
"""
p = Profiler(interval=interval, async_mode="enabled")
p.start()
result = await coro
p.stop()
return result, p
# ─────────────────────────────────────────────────────────────────────────────
# 3. Session management (save / compare)
# ─────────────────────────────────────────────────────────────────────────────
def save_session(profiler: Profiler, path: str | Path) -> Path:
"""
Serialize a profiler session to JSON for later comparison.
Example: compare baseline vs. optimized run.
"""
p = Path(path)
session_dict = profiler.last_session
serialized = json.dumps({
"start_time": session_dict.start_time if hasattr(session_dict, "start_time") else 0,
"duration": profiler.last_session.duration if hasattr(profiler.last_session, "duration") else 0,
"html": profile_output(profiler, fmt="json"),
}, indent=2)
p.write_text(serialized, encoding="utf-8")
return p
@dataclass
class ProfileSummary:
total_time: float
frame_count: int
top_frames: list[str]
def parse_json_profile(profiler: Profiler) -> ProfileSummary:
"""
Parse the JSON renderer output into a structured summary.
"""
raw = profiler.output(renderer=JSONRenderer())
data = json.loads(raw)
timing = data.get("duration", 0.0)
def collect_frames(frame: dict, depth: int = 0) -> list[tuple[str, float]]:
if depth > 20:
return []
fn = frame.get("function", "?")
tm = frame.get("time", 0.0)
result = [(f"{fn}", tm)]
for child in frame.get("children", []):
result.extend(collect_frames(child, depth + 1))
return result
root = data.get("root_frame", {})
frames = sorted(collect_frames(root), key=lambda x: x[1], reverse=True)
return ProfileSummary(
total_time=timing,
frame_count=len(frames),
top_frames=[f"{fn} ({tm:.3f}s)" for fn, tm in frames[:10]],
)
# ─────────────────────────────────────────────────────────────────────────────
# 4. FastAPI / ASGI profiling middleware
# ─────────────────────────────────────────────────────────────────────────────
FASTAPI_MIDDLEWARE_EXAMPLE = '''
from fastapi import FastAPI, Request, Response
from pyinstrument import Profiler
app = FastAPI()
PROFILE_HEADER = "X-Profile" # add this header to trigger profiling
@app.middleware("http")
async def profiling_middleware(request: Request, call_next):
if request.headers.get(PROFILE_HEADER) == "1":
p = Profiler(interval=0.001, async_mode="enabled")
p.start()
response = await call_next(request)
p.stop()
html = p.output_html()
return Response(html, media_type="text/html")
return await call_next(request)
'''
# ─────────────────────────────────────────────────────────────────────────────
# 5. Performance benchmark helper
# ─────────────────────────────────────────────────────────────────────────────
def benchmark(
fn: Callable,
*args: Any,
iterations: int = 3,
warmup: int = 1,
interval: float = 0.001,
**kwargs: Any,
) -> dict[str, Any]:
"""
Run fn multiple times, profile the median run.
Returns dict with timing stats and profile output.
Example:
result = benchmark(sort_records, data, iterations=5)
print(result["median_ms"], "ms")
"""
times: list[float] = []
# Warmup
for _ in range(warmup):
fn(*args, **kwargs)
profiler = Profiler(interval=interval)
for i in range(iterations):
t0 = time.perf_counter()
if i == iterations // 2:
profiler.start()
result = fn(*args, **kwargs)
t1 = time.perf_counter()
if i == iterations // 2:
profiler.stop()
times.append((t1 - t0) * 1_000)
times_sorted = sorted(times)
return {
"iterations": iterations,
"min_ms": round(min(times_sorted), 3),
"max_ms": round(max(times_sorted), 3),
"median_ms": round(times_sorted[len(times_sorted) // 2], 3),
"mean_ms": round(sum(times_sorted) / len(times_sorted), 3),
"profiler": profiler,
"last_result": result,
}
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
def fibonacci(n: int) -> int:
if n <= 1:
return n
a, b = 0, 1
for _ in range(n - 1):
a, b = b, a + b
return b
def sort_strings(n: int = 10_000) -> list[str]:
import random, string
data = ["".join(random.choices(string.ascii_lowercase, k=8)) for _ in range(n)]
return sorted(data)
print("=== Sync profile ===")
with profile() as p:
sort_strings(50_000)
print("\n=== profile_call ===")
result, p = profile_call(sort_strings, 30_000)
print(f" Result length: {len(result)}")
html = p.output_html()
Path("/tmp/sort_profile.html").write_text(html)
print(f" Saved flamegraph: {len(html):,} chars")
print("\n=== Benchmark ===")
stats = benchmark(sort_strings, iterations=3)
print(f" min={stats['min_ms']}ms median={stats['median_ms']}ms max={stats['max_ms']}ms")
print("\n=== Async profile ===")
async def async_work():
tasks = [asyncio.sleep(0.01) for _ in range(5)]
await asyncio.gather(*tasks)
async def main():
async with async_profile(print_output=True) as p:
await async_work()
return p
asyncio.run(main())
For the cProfile stdlib alternative — cProfile is a deterministic profiler that records every function call, giving exact call counts and cumulative times with no sampling error; pyinstrument uses statistical sampling (default 1 ms) which has near-zero overhead on production workloads and produces a call tree that shows time proportionally — cProfile is ideal for exact counts in tests, pyinstrument is better for profiling long-running services where deterministic overhead is unacceptable. For the scalene alternative — scalene profiles CPU time (Python vs. native), memory, and GPU simultaneously and produces per-line attribution; pyinstrument focuses on call-stack time with sub-millisecond sampling, produces beautiful interactive flamegraphs, and has first-class asyncio support — pyinstrument is faster to use for “which function is slow?” questions, scalene when you need memory and CPU breakdown together. The Claude Skills 360 bundle includes pyinstrument skill sets covering profile() sync context manager, profile_call() function wrapper, async_profile() context manager with async_mode, profile_async_call() coroutine profiler, save_html()/save_speedscope() output helpers, parse_json_profile() structured summary, benchmark() multi-iteration timing, FastAPI profiling middleware, and SessionSerializer for profile comparison. Start with the free tier to try call-stack profiling code generation.