Seaborn builds statistical visualizations on top of Matplotlib. pip install seaborn. import seaborn as sns. Theme: sns.set_theme(style="whitegrid", palette="muted", font_scale=1.2). Scatter: sns.scatterplot(data=df, x="col_a", y="col_b", hue="category", size="value"). Relplot: sns.relplot(data=df, x, y, col="facet", kind="scatter") — figure-level. Line: sns.lineplot(data=df, x, y, hue, estimator="mean", errorbar="ci"). Distribution: sns.histplot(df, x="value", kde=True, bins=50, hue="group", stat="density"). KDE: sns.kdeplot(df, x, y, fill=True, thresh=0.1). Displot: sns.displot(df, x, col="group", kind="kde") — faceted. Box: sns.boxplot(data=df, x="category", y="value", hue="group"). Violin: sns.violinplot(data=df, x, y, split=True, inner="quartile"). Strip: sns.stripplot(data=df, x, y, jitter=True, dodge=True). Heatmap: sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="RdBu_r", vmin=-1, vmax=1, linewidths=0.5). Clustermap: sns.clustermap(df, method="ward", cmap="viridis", z_score=1). Pairplot: sns.pairplot(df, hue="class", diag_kind="kde", plot_kws={"alpha": 0.6}). Jointplot: sns.jointplot(data=df, x, y, kind="reg", marginal_kws={"bins":20}). Regression: sns.regplot(data=df, x, y, lowess=True). Lmplot: sns.lmplot(data=df, x, y, col="group", hue="sex"). FacetGrid: g = sns.FacetGrid(df, col="group", row="year"), g.map_dataframe(sns.scatterplot, x, y). Catplot: sns.catplot(data=df, x, y, col, kind="box", height=4). Objects API: so.Plot(df, x="x", y="y").add(so.Dot()).add(so.Line()).facet(col="group"). Save: plt.savefig("plot.png", dpi=150, bbox_inches="tight"). Claude Code generates Seaborn EDA dashboards, correlation heatmaps, faceted distribution plots, and regression visualization scripts.
CLAUDE.md for Seaborn
## Seaborn Stack
- Version: seaborn >= 0.13
- Theme: sns.set_theme(style, palette, font_scale) — call once at startup
- Axes-level: sns.scatterplot/lineplot/histplot/boxplot/heatmap(ax=ax)
- Figure-level: sns.relplot/displot/catplot/lmplot — return FacetGrid
- Pairwise: sns.pairplot(df, hue) | clustermap(matrix)
- Objects: from seaborn import objects as so — grammar-of-graphics API
- Save: plt.savefig(path, dpi=150, bbox_inches="tight") after any plot
Seaborn Statistical Visualization Pipeline
# viz/seaborn_pipeline.py — statistical data visualization with Seaborn
from __future__ import annotations
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
# Global theme — call once
sns.set_theme(style="whitegrid", palette="muted", font_scale=1.1)
FIGSIZE_DEFAULT = (10, 6)
# ── 0. Helpers ────────────────────────────────────────────────────────────────
def _save(fig_or_ax, path: str, dpi: int = 150) -> str:
"""Save a figure to disk, creating parent dirs if needed."""
Path(path).parent.mkdir(parents=True, exist_ok=True)
if isinstance(fig_or_ax, sns.FacetGrid):
fig_or_ax.savefig(path, dpi=dpi, bbox_inches="tight")
else:
plt.savefig(path, dpi=dpi, bbox_inches="tight")
plt.close("all")
print(f"Saved: {path}")
return path
# ── 1. Distribution plots ─────────────────────────────────────────────────────
def distribution_overview(
df: pd.DataFrame,
col: str,
hue: str = None,
output: str = "dist_overview.png",
bins: int = 40,
) -> str:
"""
3-panel distribution overview: histogram+KDE, boxplot, ECDF.
"""
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# Histogram + KDE
sns.histplot(data=df, x=col, hue=hue, kde=True, bins=bins, stat="density",
alpha=0.6, ax=axes[0])
axes[0].set_title(f"Distribution of {col}")
# Box plot
if hue:
sns.boxplot(data=df, x=hue, y=col, ax=axes[1])
else:
sns.boxplot(data=df, y=col, ax=axes[1])
axes[1].set_title("Box plot")
# ECDF
sns.ecdfplot(data=df, x=col, hue=hue, ax=axes[2])
axes[2].set_title("ECDF")
fig.suptitle(f"{col} — Distribution Overview", y=1.02)
plt.tight_layout()
return _save(fig, output)
def kde_by_group(
df: pd.DataFrame,
x: str,
group: str,
fill: bool = True,
output: str = "kde_groups.png",
) -> str:
"""Overlapping KDE curves by group — useful for comparing distributions."""
fig, ax = plt.subplots(figsize=FIGSIZE_DEFAULT)
sns.kdeplot(data=df, x=x, hue=group, fill=fill, alpha=0.3,
common_norm=False, ax=ax)
ax.set_title(f"{x} density by {group}")
plt.tight_layout()
return _save(fig, output)
def faceted_distributions(
df: pd.DataFrame,
x: str,
col: str,
row: str = None,
kind: str = "hist", # "hist" | "kde" | "ecdf"
output: str = "facet_dist.png",
) -> str:
"""Faceted distribution across a categorical variable."""
g = sns.displot(data=df, x=x, col=col, row=row, kind=kind,
facet_kws={"sharey": False}, height=3, aspect=1.2)
g.set_titles("{col_name}")
plt.tight_layout()
return _save(g, output)
# ── 2. Relationship plots ─────────────────────────────────────────────────────
def scatter_with_trend(
df: pd.DataFrame,
x: str,
y: str,
hue: str = None,
lowess: bool = False,
output: str = "scatter_trend.png",
) -> str:
"""Scatter plot with regression (or LOWESS) line."""
fig, ax = plt.subplots(figsize=FIGSIZE_DEFAULT)
sns.regplot(data=df, x=x, y=y, lowess=lowess,
scatter_kws={"alpha": 0.4}, ax=ax)
if hue:
sns.scatterplot(data=df, x=x, y=y, hue=hue, alpha=0.6, ax=ax)
ax.set_title(f"{y} vs {x}" + (" (LOWESS)" if lowess else " (OLS)"))
plt.tight_layout()
return _save(fig, output)
def pairplot(
df: pd.DataFrame,
cols: list[str] = None,
hue: str = None,
diag_kind: str = "kde",
output: str = "pairplot.png",
) -> str:
"""Pairplot for selected numeric columns."""
data = df[cols] if cols else df.select_dtypes(include=np.number)
if hue and hue in df.columns:
data = pd.concat([data, df[[hue]]], axis=1)
g = sns.pairplot(data, hue=hue, diag_kind=diag_kind,
plot_kws={"alpha": 0.5}, corner=True)
g.figure.suptitle("Pairplot", y=1.01)
return _save(g, output)
def line_with_ci(
df: pd.DataFrame,
x: str,
y: str,
hue: str = None,
errorbar: str = "ci", # "ci" | "sd" | "se" | None
output: str = "lineplot.png",
) -> str:
"""Line plot with confidence interval band."""
fig, ax = plt.subplots(figsize=FIGSIZE_DEFAULT)
sns.lineplot(data=df, x=x, y=y, hue=hue, errorbar=errorbar,
markers=True, ax=ax)
ax.set_title(f"{y} over {x}" + (f" by {hue}" if hue else ""))
if "date" in x.lower() or "time" in x.lower():
ax.tick_params(axis="x", rotation=45)
plt.tight_layout()
return _save(fig, output)
# ── 3. Categorical plots ──────────────────────────────────────────────────────
def categorical_comparison(
df: pd.DataFrame,
x: str,
y: str,
hue: str = None,
kind: str = "violin", # "violin" | "box" | "bar" | "strip" | "point"
output: str = "cat_plot.png",
) -> str:
"""Compare a numeric value across categorical groups."""
g = sns.catplot(
data=df, x=x, y=y, hue=hue, kind=kind,
height=5, aspect=1.4,
order=df[x].value_counts().index.tolist() if df[x].nunique() < 20 else None,
)
g.set_titles(f"{kind} plot")
g.set_xticklabels(rotation=30, ha="right")
return _save(g, output)
def count_plot(
df: pd.DataFrame,
col: str,
hue: str = None,
top_n: int = 20,
output: str = "countplot.png",
) -> str:
"""Bar chart of category frequencies (top N by count)."""
order = df[col].value_counts().iloc[:top_n].index.tolist()
fig, ax = plt.subplots(figsize=(10, max(4, top_n * 0.35)))
sns.countplot(data=df, y=col, hue=hue, order=order, ax=ax)
ax.set_title(f"Count of {col}")
plt.tight_layout()
return _save(fig, output)
# ── 4. Heatmaps ───────────────────────────────────────────────────────────────
def correlation_heatmap(
df: pd.DataFrame,
cols: list[str] = None,
method: str = "pearson",
annot: bool = True,
cmap: str = "RdBu_r",
output: str = "correlation.png",
) -> str:
"""
Annotated correlation heatmap.
Masks upper triangle for cleaner reading.
"""
data = df[cols] if cols else df.select_dtypes(include=np.number)
corr = data.corr(method=method)
# Mask upper triangle
mask = np.triu(np.ones_like(corr), k=1)
fig, ax = plt.subplots(figsize=(max(6, len(corr) * 0.6),
max(5, len(corr) * 0.5)))
sns.heatmap(
corr, mask=mask, annot=annot, fmt=".2f",
cmap=cmap, vmin=-1, vmax=1,
linewidths=0.4, ax=ax,
)
ax.set_title(f"{method.capitalize()} Correlation Matrix")
plt.tight_layout()
return _save(fig, output)
def pivot_heatmap(
df: pd.DataFrame,
index: str,
columns: str,
values: str,
aggfunc: str = "mean",
fmt: str = ".1f",
output: str = "pivot_heatmap.png",
) -> str:
"""Heatmap of a pivot table — e.g. region × product sales."""
pivot = df.pivot_table(index=index, columns=columns, values=values, aggfunc=aggfunc)
fig_h = max(4, len(pivot) * 0.4 + 1)
fig_w = max(6, len(pivot.columns) * 0.6 + 1)
fig, ax = plt.subplots(figsize=(fig_w, fig_h))
sns.heatmap(pivot, annot=True, fmt=fmt, cmap="YlOrRd", linewidths=0.4, ax=ax)
ax.set_title(f"{values} ({aggfunc}) by {index} × {columns}")
plt.tight_layout()
return _save(fig, output)
def clustermap(
df: pd.DataFrame,
cols: list[str] = None,
method: str = "ward",
z_score: int = 1, # 0=row, 1=col, None=off
output: str = "clustermap.png",
) -> str:
"""Hierarchical clustering heatmap (rows and columns)."""
data = df[cols] if cols else df.select_dtypes(include=np.number)
g = sns.clustermap(
data, method=method, metric="euclidean",
z_score=z_score, cmap="vlag",
figsize=(max(6, data.shape[1] * 0.5 + 2),
max(6, data.shape[0] * 0.15 + 2)),
)
g.ax_heatmap.set_title("Hierarchical Clustermap")
return _save(g, output)
# ── 5. FacetGrid ──────────────────────────────────────────────────────────────
def faceted_scatter(
df: pd.DataFrame,
x: str,
y: str,
col: str,
hue: str = None,
row: str = None,
output: str = "facet_scatter.png",
) -> str:
"""Scatter plots faceted by a categorical variable."""
g = sns.FacetGrid(df, col=col, row=row, hue=hue,
height=3.5, aspect=1.2, margin_titles=True)
g.map_dataframe(sns.scatterplot, x=x, y=y, alpha=0.5)
g.add_legend()
g.set_titles(col_template="{col_name}")
return _save(g, output)
# ── 6. Seaborn Objects (v0.13+) ───────────────────────────────────────────────
def objects_scatter_line(
df: pd.DataFrame,
x: str,
y: str,
color: str = None,
output: str = "so_plot.png",
) -> str:
"""
Grammar-of-graphics style layered plot using seaborn.objects.
Dot layer + smooth trend line.
"""
from seaborn import objects as so
p = (
so.Plot(df, x=x, y=y, color=color)
.add(so.Dot(alpha=0.4))
.add(so.Line(), so.PolyFit(order=2))
.theme({"axes.spines.top": False, "axes.spines.right": False})
)
fig = p.plot()
return _save(fig, output)
# ── Demo ──────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile, os
print("Seaborn Statistical Visualization Demo")
print("=" * 50)
# Sample data
np.random.seed(42)
n = 500
df = pd.DataFrame({
"age": np.random.randint(18, 75, n),
"income": np.random.lognormal(10.5, 0.7, n),
"score": np.random.normal(65, 15, n).clip(0, 100),
"region": np.random.choice(["North", "South", "East", "West"], n),
"product": np.random.choice(["A", "B", "C"], n),
"churn": np.random.choice(["Yes", "No"], n, p=[0.3, 0.7]),
})
with tempfile.TemporaryDirectory() as tmpdir:
# Distribution overview
path = distribution_overview(df, "income", hue="churn",
output=f"{tmpdir}/dist.png")
print(f"Distribution plot: {path}")
# Correlation heatmap
path = correlation_heatmap(df, cols=["age", "income", "score"],
output=f"{tmpdir}/corr.png")
print(f"Correlation heatmap: {path}")
# Categorical violin plot
path = categorical_comparison(df, x="region", y="income", hue="churn",
kind="violin", output=f"{tmpdir}/violin.png")
print(f"Violin plot: {path}")
# Pivot heatmap
path = pivot_heatmap(df, index="region", columns="product", values="score",
output=f"{tmpdir}/pivot.png")
print(f"Pivot heatmap: {path}")
# Line with CI (time series)
ts_df = pd.DataFrame({
"month": np.tile(range(24), 4),
"sales": np.random.normal(1000, 150, 96),
"region": np.repeat(["N","S","E","W"], 24),
})
path = line_with_ci(ts_df, x="month", y="sales", hue="region",
output=f"{tmpdir}/lineplot.png")
print(f"Line + CI plot: {path}")
print("\nAll plots saved successfully")
For the Matplotlib alternative when full control over every rendering detail is required — Matplotlib provides pixel-level customization of every element while Seaborn’s catplot, displot, and relplot produce publication-quality statistical graphics with correct default aesthetics in one function call, and the hue semantic automatically assigns distinct colors and a legend without manual plt.scatter calls for each group. For the Plotly alternative when interactive hover and zoom matter — Plotly produces interactive HTML charts while Seaborn’s clustermap with built-in hierarchical ordering, pairplot with per-diagonal KDE, and regplot with confidence bands plus residuals in a single call combine statistical computation with rendering that would require 50+ lines in Plotly’s graph_objects API, making Seaborn the faster path for EDA and statistical reporting. The Claude Skills 360 bundle includes Seaborn skill sets covering histogram and KDE distribution plots, correlation heatmaps with triangle masking, violin and box categorical comparisons, faceted scatter and distribution grids, pairplot and clustermap, lineplot with confidence intervals, pivot heatmaps, and the Objects grammar-of-graphics API. Start with the free tier to try statistical visualization code generation.