scalene profiles CPU time (Python vs. native), memory, and GPU — all per line. pip install scalene. CLI: scalene script.py — opens HTML report in browser. No restart: scalene --pid <PID>. JSON: scalene --json --outfile profile.json script.py. Profile one function: from scalene import scalene_profiler; scalene_profiler.start(); fn(); scalene_profiler.stop(). Decorator: from scalene.scalene_profiler import Scalene; @Scalene.profile. Reduced: scalene --reduced-profile script.py — top N lines only. Malloc: scalene --malloc-threshold 100 script.py — only show ≥100 B allocs. Interval: scalene --profile-interval 0.01 script.py — 10ms sampling. All: scalene --profile-all script.py — include library code. CPU-only: scalene --cpu-only script.py. Memory-only: scalene --memory-only script.py. Native: scalene --profile-all script.py includes C extensions. GPU: scalene --gpu script.py (needs NVIDIA). AI: scalene --cli script.py — generates AI improvement suggestions. scalene --web script.py — always opens interactive web UI. --outfile results.html for HTML file. JSON schema: {files: {path: {lines: {n: {cpu_percent, memory_mb, ...}}}}}. pytest: pytest --profile. Claude Code generates scalene decorators, JSON parsers, and CI regression scripts.
CLAUDE.md for scalene
## scalene Stack
- Version: scalene >= 1.5 | pip install scalene
- CLI: scalene script.py → HTML browser report
- JSON: scalene --json --outfile out.json script.py → parseable output
- Selective: scalene --reduced-profile (hottest lines only)
- Programmatic: scalene_profiler.start() / .stop() around code block
- GPU: scalene --gpu (NVIDIA cards); CPU-only: scalene --cpu-only
scalene Profiling Pipeline
# app/scalene_utils.py — scalene JSON parser, CI regression check, hot-line reporter
from __future__ import annotations
import json
import subprocess
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
# ─────────────────────────────────────────────────────────────────────────────
# 1. Run scalene and capture JSON output
# ─────────────────────────────────────────────────────────────────────────────
def run_profile(
script: str | list[str],
output_json: str | Path = "/tmp/scalene_profile.json",
cpu_only: bool = False,
memory_only: bool = False,
reduced: bool = False,
profile_all: bool = False,
malloc_threshold: int | None = None,
profile_interval: float | None = None,
gpu: bool = False,
extra_args: list[str] | None = None,
) -> Path:
"""
Run a Python script under scalene and save JSON output.
Returns path to the JSON file.
Example:
path = run_profile("benchmarks/matrix_ops.py", cpu_only=True)
report = load_profile(path)
"""
out_path = Path(output_json)
cmd = [sys.executable, "-m", "scalene", "--json",
"--outfile", str(out_path), "--cli"]
if cpu_only: cmd.append("--cpu-only")
if memory_only: cmd.append("--memory-only")
if reduced: cmd.append("--reduced-profile")
if profile_all: cmd.append("--profile-all")
if gpu: cmd.append("--gpu")
if malloc_threshold is not None:
cmd += ["--malloc-threshold", str(malloc_threshold)]
if profile_interval is not None:
cmd += ["--profile-interval", str(profile_interval)]
if extra_args:
cmd.extend(extra_args)
# Append script
if isinstance(script, str):
cmd.append(script)
else:
cmd.extend(script)
subprocess.run(cmd, check=True)
return out_path
# ─────────────────────────────────────────────────────────────────────────────
# 2. Parse JSON profile
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class LineProfile:
"""Profile data for a single source line."""
file: str
lineno: int
cpu_percent: float = 0.0 # fraction of total CPU time (0–1)
memory_mb: float = 0.0 # peak memory at this line (MB)
python_fraction: float = 0.0 # fraction of wall time in Python vs. native
n_malloc_mb: float = 0.0 # memory allocated at this line (MB)
n_free_mb: float = 0.0 # memory freed at this line (MB)
source: str = ""
@dataclass
class ProfileReport:
"""Summary of a scalene JSON profile."""
total_cpu_seconds: float = 0.0
total_memory_peak_mb: float = 0.0
hot_lines: list[LineProfile] = field(default_factory=list)
all_lines: list[LineProfile] = field(default_factory=list)
def load_profile(json_path: str | Path) -> ProfileReport:
"""
Parse a scalene JSON profile into a ProfileReport.
scalene JSON schema:
{
"files": {
"path/to/script.py": {
"lines": {
"10": {"cpu_percent": 0.42, "memory_average_mb": 12.5, ...},
...
}
}
}
}
"""
data = json.loads(Path(json_path).read_text())
files_data = data.get("files", {})
all_lines: list[LineProfile] = []
for fpath, file_info in files_data.items():
source_lines = {}
try:
src_path = Path(fpath)
if src_path.exists():
source_lines = {
i + 1: line.rstrip()
for i, line in enumerate(src_path.read_text().splitlines())
}
except Exception:
pass
for lineno_str, line_data in file_info.get("lines", {}).items():
if not isinstance(line_data, dict):
continue
lp = LineProfile(
file=fpath,
lineno=int(lineno_str),
cpu_percent=float(line_data.get("cpu_percent", 0)),
memory_mb=float(line_data.get("memory_average_mb", 0)),
python_fraction=float(line_data.get("python_fraction", 1.0)),
n_malloc_mb=float(line_data.get("n_malloc_mb", 0)),
n_free_mb=float(line_data.get("n_free_mb", 0)),
source=source_lines.get(int(lineno_str), ""),
)
all_lines.append(lp)
all_lines.sort(key=lambda x: x.cpu_percent, reverse=True)
total_cpu = data.get("elapsed_time_sec", 0.0)
total_mem = max((lp.memory_mb for lp in all_lines), default=0.0)
return ProfileReport(
total_cpu_seconds=total_cpu,
total_memory_peak_mb=total_mem,
hot_lines=all_lines[:20],
all_lines=all_lines,
)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Hot-line reporter
# ─────────────────────────────────────────────────────────────────────────────
def print_hot_lines(
report: ProfileReport,
top_n: int = 10,
show_memory: bool = True,
) -> None:
"""
Print the top N CPU-hottest lines in a readable table.
Example output:
Rank File Line CPU% MemMB Source
1 matrix_ops.py 42 38.5% 128.3 result = A @ B
"""
lines = report.all_lines[:top_n]
print(f"\n{'Rank':<5} {'File':<24} {'Line':>5} {'CPU%':>6} {'MemMB':>7} Source")
print("─" * 80)
for i, lp in enumerate(lines, 1):
fname = Path(lp.file).name
cpu = f"{lp.cpu_percent * 100:.1f}%"
mem = f"{lp.memory_mb:.1f}" if show_memory else ""
src = lp.source.strip()[:40]
print(f"{i:<5} {fname:<24} {lp.lineno:>5} {cpu:>6} {mem:>7} {src}")
def hot_lines_dict(
report: ProfileReport,
top_n: int = 10,
) -> list[dict[str, Any]]:
"""
Return hot lines as a list of dicts — suitable for JSON export or CI reporting.
"""
return [
{
"rank": i + 1,
"file": lp.file,
"lineno": lp.lineno,
"cpu_percent": round(lp.cpu_percent * 100, 2),
"memory_mb": round(lp.memory_mb, 2),
"python_fraction": round(lp.python_fraction, 3),
"source": lp.source.strip(),
}
for i, lp in enumerate(report.all_lines[:top_n])
]
# ─────────────────────────────────────────────────────────────────────────────
# 4. CI regression check
# ─────────────────────────────────────────────────────────────────────────────
def compare_profiles(
baseline_path: str | Path,
current_path: str | Path,
cpu_regression_pct: float = 0.20,
memory_regression_pct: float = 0.20,
) -> dict[str, Any]:
"""
Compare two scalene JSON profiles and detect regressions.
Returns a dict with regression flags and delta details.
cpu_regression_pct: fail if total CPU increases by >20%.
memory_regression_pct: fail if peak memory increases by >20%.
Example (CI usage):
result = compare_profiles("baseline.json", "current.json")
if result["regression"]:
sys.exit(1)
"""
base = load_profile(baseline_path)
curr = load_profile(current_path)
cpu_delta = (
(curr.total_cpu_seconds - base.total_cpu_seconds) / base.total_cpu_seconds
if base.total_cpu_seconds > 0 else 0.0
)
mem_delta = (
(curr.total_memory_peak_mb - base.total_memory_peak_mb) / base.total_memory_peak_mb
if base.total_memory_peak_mb > 0 else 0.0
)
cpu_regressed = cpu_delta > cpu_regression_pct
mem_regressed = mem_delta > memory_regression_pct
return {
"regression": cpu_regressed or mem_regressed,
"cpu_regressed": cpu_regressed,
"memory_regressed": mem_regressed,
"cpu_delta_pct": round(cpu_delta * 100, 1),
"memory_delta_pct": round(mem_delta * 100, 1),
"baseline_cpu_sec": round(base.total_cpu_seconds, 3),
"current_cpu_sec": round(curr.total_cpu_seconds, 3),
"baseline_memory_mb": round(base.total_memory_peak_mb, 2),
"current_memory_mb": round(curr.total_memory_peak_mb, 2),
}
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
BENCH = "/tmp/_scalene_bench.py"
# Write a small benchmark script
Path(BENCH).write_text("""\
import time
def sort_strings(n=10_000):
import random, string
data = ["".join(random.choices(string.ascii_lowercase, k=8)) for _ in range(n)]
return sorted(data)
def matrix_mul(n=200):
a = [[i * j for j in range(n)] for i in range(n)]
b = [[i + j for j in range(n)] for i in range(n)]
return [[sum(a[r][k] * b[k][c] for k in range(n)) for c in range(n)] for r in range(n)]
sort_strings(20_000)
matrix_mul(100)
""")
print("=== Running scalene profile ===")
try:
path = run_profile(BENCH, cpu_only=True, reduced=True)
print(f" JSON saved: {path}")
report = load_profile(path)
print(f" Total CPU: {report.total_cpu_seconds:.2f}s")
print(f" Lines profiled: {len(report.all_lines)}")
print_hot_lines(report, top_n=5, show_memory=False)
hot = hot_lines_dict(report, top_n=3)
print("\n=== Top 3 as dicts ===")
for entry in hot:
print(f" {entry}")
except subprocess.CalledProcessError as e:
print(f"scalene returned non-zero: {e}")
except FileNotFoundError:
print("scalene not installed — pip install scalene")
For the cProfile alternative — cProfile records exact call counts and cumulative/per-call time at the function level but cannot distinguish Python-executed time from time spent inside C extensions; scalene separates “Python time” from “native/C time” per line, and adds memory allocation tracking — giving you the full picture of where time is actually spent, including numpy and pandas operations in C. For the pyinstrument alternative — pyinstrument uses statistical sampling to produce a fast call-tree overview with low overhead; scalene augments sampling with memory allocation hooks to produce per-line memory attribution alongside CPU time — use pyinstrument for quick “which function?” questions, scalene when you need per-line memory and want to differentiate Python vs. C extension costs in data-heavy workloads. The Claude Skills 360 bundle includes scalene skill sets covering run_profile() with cpu/memory/gpu/reduced/malloc_threshold options, load_profile() JSON parser, LineProfile and ProfileReport dataclasses, print_hot_lines() table renderer, hot_lines_dict() for CI/JSON export, compare_profiles() regression detector, scalene —cli AI suggestions, and pytest —profile integration notes. Start with the free tier to try per-line CPU and memory profiling code generation.