Python’s pstats module loads and analyzes profiling data produced by cProfile or profile. import pstats. Stats: s = pstats.Stats("output.prof") — load from file; or pstats.Stats(cprofile_obj) from a running profile. sort_stats: s.sort_stats("cumulative") — sort by cumulative time; "tottime" (self time), "ncalls", "pcalls" (primitive calls), "filename", "name". pstats.SortKey.CUMULATIVE enum (3.7+). print_stats: s.print_stats(10) — top 10 lines; s.print_stats("mymodule") — filter by regex; s.print_stats(0.1) — top 10% of entries. print_callers: s.print_callers(10) — show which functions called each entry. print_callees: s.print_callees(10) — show what each function called. strip_dirs: s.strip_dirs() — remove path prefixes from filenames. add: s.add("second_run.prof") — merge multiple profiles. dump_stats: s.dump_stats("output.prof") — save back to file. get_stats_profile: sp = s.get_stats_profile() (3.12+) → StatsProfile(total_tt, func_profiles). Each function entry: (ncalls, tottime, percall_tottime, cumtime, percall_cumtime, file:line(name)). stdout: stream = io.StringIO(); pstats.Stats(prof, stream=stream) — capture output. Claude Code generates profiling harnesses, flamegraph preparers, hotspot reporters, and CI regression detectors.
CLAUDE.md for pstats
## pstats Stack
- Stdlib: import cProfile, pstats, io
- Run: prof = cProfile.Profile(); prof.enable(); ...; prof.disable()
- Save: prof.dump_stats("out.prof")
- Load: s = pstats.Stats("out.prof", stream=io.StringIO())
- Sort: s.sort_stats("cumulative").print_stats(20)
- Merge: s.add("run2.prof")
- Capture: buf=io.StringIO(); pstats.Stats(prof, stream=buf).sort_stats("cumtime").print_stats(10)
pstats Profile Analysis Pipeline
# app/pstatsutil.py — run, load, sort, report, compare, top functions
from __future__ import annotations
import cProfile
import io
import os
import pstats
import time
from contextlib import contextmanager
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Generator
# ─────────────────────────────────────────────────────────────────────────────
# 1. Profile collection helpers
# ─────────────────────────────────────────────────────────────────────────────
@contextmanager
def profile(
output: str | Path | None = None,
clock: str = "process_time",
) -> Generator[cProfile.Profile, None, None]:
"""
Context manager that profiles the enclosed block and yields the Profile.
Optionally saves to output path.
Example:
with profile("run.prof") as prof:
my_expensive_function()
report(prof, top=10)
"""
p = cProfile.Profile()
p.enable()
try:
yield p
finally:
p.disable()
if output:
p.dump_stats(str(output))
def profile_function(
fn: Callable,
*args: Any,
output: str | Path | None = None,
**kwargs: Any,
) -> tuple[Any, cProfile.Profile]:
"""
Profile a single function call and return (result, profile).
Example:
result, prof = profile_function(sorted, large_list, reverse=True)
report(prof, top=5)
"""
p = cProfile.Profile()
p.enable()
result = fn(*args, **kwargs)
p.disable()
if output:
p.dump_stats(str(output))
return result, p
# ─────────────────────────────────────────────────────────────────────────────
# 2. Stats loading and sorting
# ─────────────────────────────────────────────────────────────────────────────
def load_stats(
source: str | Path | cProfile.Profile,
strip_dirs: bool = True,
) -> pstats.Stats:
"""
Load profiling stats from a .prof file or a cProfile.Profile object.
Example:
s = load_stats("run.prof")
s.sort_stats("cumulative").print_stats(10)
"""
buf = io.StringIO()
s = pstats.Stats(str(source) if isinstance(source, Path) else source, stream=buf)
if strip_dirs:
s.strip_dirs()
return s
def merge_stats(
sources: list[str | Path | cProfile.Profile],
strip_dirs: bool = True,
) -> pstats.Stats:
"""
Merge multiple profile runs into a single Stats object.
Example:
combined = merge_stats(["run1.prof", "run2.prof", "run3.prof"])
report_stats(combined)
"""
if not sources:
raise ValueError("sources must be non-empty")
s = load_stats(sources[0], strip_dirs=strip_dirs)
for src in sources[1:]:
extra = pstats.Stats(str(src) if isinstance(src, Path) else src, stream=io.StringIO())
if strip_dirs:
extra.strip_dirs()
s.add(extra)
return s
# ─────────────────────────────────────────────────────────────────────────────
# 3. Report generation
# ─────────────────────────────────────────────────────────────────────────────
def report(
source: str | Path | cProfile.Profile | pstats.Stats,
top: int = 20,
sort: str = "cumulative",
filter_regex: str | None = None,
callers: bool = False,
callees: bool = False,
) -> str:
"""
Generate a text profiling report and return it as a string.
sort: "cumulative", "tottime", "ncalls", "pcalls", "filename".
filter_regex: limit to matching function paths.
Example:
with profile() as prof:
run_app()
print(report(prof, top=15, sort="cumulative"))
"""
buf = io.StringIO()
if isinstance(source, pstats.Stats):
s = source
s.stream = buf
else:
s = pstats.Stats(
str(source) if isinstance(source, Path) else source,
stream=buf
)
s.strip_dirs()
s.sort_stats(sort)
if filter_regex:
s.print_stats(top, filter_regex)
else:
s.print_stats(top)
if callers:
s.print_callers(top)
if callees:
s.print_callees(top)
return buf.getvalue()
# ─────────────────────────────────────────────────────────────────────────────
# 4. Structured hotspot extraction
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class FunctionProfile:
name: str
file: str
line: int
ncalls: int
tottime: float # self time (no sub-calls)
cumtime: float # cumulative time (incl. sub-calls)
tottime_per: float # tottime per call
cumtime_per: float # cumtime per call
def __str__(self) -> str:
return (
f"{self.cumtime:8.4f}s cum "
f"{self.tottime:8.4f}s tot "
f"{self.ncalls:6d} calls "
f"{self.name} ({self.file}:{self.line})"
)
def top_functions(
source: str | Path | cProfile.Profile,
top: int = 20,
sort: str = "cumulative",
min_tottime: float = 0.0,
) -> list[FunctionProfile]:
"""
Return the top hotspot functions as a structured list.
Example:
with profile() as prof:
run_app()
for fn in top_functions(prof, top=10):
print(fn)
"""
buf = io.StringIO()
s = pstats.Stats(
str(source) if isinstance(source, Path) else source,
stream=buf
)
s.strip_dirs().sort_stats(sort)
results: list[FunctionProfile] = []
# pstats stores data as {(file, line, name): (cc, nc, tt, ct, callers)}
for (file, line, name), (cc, nc, tt, ct, _callers) in s.stats.items():
if tt < min_tottime:
continue
tot_per = tt / nc if nc else 0.0
cum_per = ct / nc if nc else 0.0
results.append(FunctionProfile(
name=name, file=file, line=line, ncalls=nc,
tottime=tt, cumtime=ct,
tottime_per=tot_per, cumtime_per=cum_per,
))
keys = {
"cumulative": lambda f: -f.cumtime,
"tottime": lambda f: -f.tottime,
"ncalls": lambda f: -f.ncalls,
}
results.sort(key=keys.get(sort, keys["cumulative"]))
return results[:top]
# ─────────────────────────────────────────────────────────────────────────────
# 5. Regression detection
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ProfileDiff:
name: str
baseline_cum: float
current_cum: float
delta_s: float
pct_change: float
@property
def is_regression(self) -> bool:
return self.pct_change > 0
def __str__(self) -> str:
sign = "+" if self.delta_s >= 0 else ""
return (f" {self.name:40s} "
f"base={self.baseline_cum:.4f}s "
f"now={self.current_cum:.4f}s "
f"{sign}{self.delta_s:.4f}s ({sign}{self.pct_change:.1f}%)")
def compare_profiles(
baseline: str | Path | cProfile.Profile,
current: str | Path | cProfile.Profile,
top: int = 20,
threshold_pct: float = 10.0,
) -> list[ProfileDiff]:
"""
Compare two profiles and flag functions that regressed by more than threshold_pct.
Returns regressions sorted by absolute delta (worst first).
Example:
regressions = compare_profiles("baseline.prof", "current.prof", threshold_pct=5)
for d in regressions:
print(d)
"""
def _load(src) -> dict[str, float]:
s = pstats.Stats(
str(src) if isinstance(src, Path) else src,
stream=io.StringIO()
)
s.strip_dirs()
return {
f"{file}:{line}({name})": ct
for (file, line, name), (cc, nc, tt, ct, _) in s.stats.items()
}
base_map = _load(baseline)
curr_map = _load(current)
diffs: list[ProfileDiff] = []
all_keys = set(base_map) | set(curr_map)
for key in all_keys:
b = base_map.get(key, 0.0)
c = curr_map.get(key, 0.0)
delta = c - b
pct = (delta / b * 100) if b else (100.0 if c else 0.0)
if abs(pct) >= threshold_pct:
diffs.append(ProfileDiff(
name=key, baseline_cum=b, current_cum=c,
delta_s=delta, pct_change=pct,
))
diffs.sort(key=lambda d: -abs(d.delta_s))
return diffs[:top]
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile
print("=== pstats demo ===")
# ── workload to profile ────────────────────────────────────────────────────
def _sort_heavy() -> list:
data = list(range(5000, 0, -1))
for _ in range(50):
sorted(data)
return data
def _sum_heavy() -> int:
total = 0
for i in range(200_000):
total += i
return total
def _workload() -> None:
_sort_heavy()
_sum_heavy()
# ── profile_function ───────────────────────────────────────────────────────
print("\n--- profile_function ---")
with tempfile.TemporaryDirectory() as tmpdir:
prof_path = Path(tmpdir) / "run.prof"
result, prof = profile_function(_workload, output=prof_path)
print(f" profile saved: {prof_path.stat().st_size:,d} bytes")
# ── report ─────────────────────────────────────────────────────────────
print("\n--- report (top 8 cumulative) ---")
txt = report(prof, top=8, sort="cumulative")
for line in txt.splitlines()[:16]:
print(f" {line}")
# ── top_functions ──────────────────────────────────────────────────────
print("\n--- top_functions ---")
fns = top_functions(prof_path, top=5, sort="tottime")
for fn in fns:
print(f" {fn}")
# ── merge_stats ────────────────────────────────────────────────────────
print("\n--- merge_stats ---")
prof_path2 = Path(tmpdir) / "run2.prof"
_, prof2 = profile_function(_workload, output=prof_path2)
combined = merge_stats([prof_path, prof_path2])
merged_fns = top_functions(combined, top=3)
print(f" merged top 3:")
for fn in merged_fns:
print(f" {fn}")
# ── compare_profiles ───────────────────────────────────────────────────
print("\n--- compare_profiles ---")
diffs = compare_profiles(prof_path, prof_path2, threshold_pct=5.0)
print(f" {len(diffs)} functions changed > 5% (noise expected across runs)")
# ── context manager ────────────────────────────────────────────────────
print("\n--- profile context manager ---")
with profile(Path(tmpdir) / "ctx.prof") as p:
_sort_heavy()
txt2 = report(p, top=3, sort="cumulative")
print(f" captured {len(txt2.splitlines())} lines of output")
print("\n=== done ===")
For the cProfile alternative — cProfile is the standard deterministic profiler that produces the data that pstats analyzes; running python -m cProfile -o out.prof script.py or using cProfile.Profile() programmatically and then passing the result to pstats.Stats() is the canonical profiling workflow — cProfile collects the data and pstats analyzes it; use pstats whenever you need to load, sort, filter, or compare .prof files rather than doing interactive analysis. For the pyinstrument / scalene alternative — pyinstrument (PyPI) uses statistical sampling rather than deterministic instrumentation, which adds far less overhead (1-10% vs. up to 100× for cProfile) and produces flame-graph output; scalene (PyPI) profiles CPU, GPU, and memory simultaneously with line-level granularity — use pyinstrument or scalene when profiling production or near-production workloads where deterministic profiling overhead is unacceptable; use cProfile+pstats for detailed call-count and exact timing analysis of unit-test-scale workloads where overhead doesn’t matter, or when you need to compare two .prof files programmatically in CI. The Claude Skills 360 bundle includes pstats skill sets covering profile() context manager, profile_function() single-call profiler, load_stats()/merge_stats() data loaders, report() text report generator, FunctionProfile dataclass with top_functions() structured hotspot extractor, and ProfileDiff with compare_profiles() regression detector. Start with the free tier to try profiling analysis patterns and pstats pipeline code generation.