PyMC enables expressive Bayesian modeling with automatic gradient-based inference. pip install pymc. import pymc as pm. Model: with pm.Model() as model:. Priors: mu = pm.Normal("mu", mu=0, sigma=10), sigma = pm.HalfNormal("sigma", sigma=1). Likelihood: obs = pm.Normal("obs", mu=mu, sigma=sigma, observed=data). Sample: with model: trace = pm.sample(2000, tune=1000, chains=4, target_accept=0.9). Summary: import arviz as az, az.summary(trace, var_names=["mu", "sigma"]) — mean, sd, HDI. Plot: az.plot_trace(trace), az.plot_posterior(trace). Predictive: with model: ppc = pm.sample_posterior_predictive(trace). Prior check: with model: prior = pm.sample_prior_predictive(100). Deterministic: mu_diff = pm.Deterministic("mu_diff", mu1 - mu2). Linear regression: alpha = pm.Normal("alpha", 0, 10), beta = pm.Normal("beta", 0, 10, shape=X.shape[1]), mu = alpha + pm.math.dot(X, beta), y = pm.Normal("y", mu=mu, sigma=sigma, observed=y_obs). Logistic: p = pm.Deterministic("p", pm.math.sigmoid(alpha + pm.math.dot(X, beta))), y = pm.Bernoulli("y", p=p, observed=y_obs). Hierarchical: coords = {"school": school_names}, with pm.Model(coords=coords): mu_school = pm.Normal("mu", 0, 1, dims="school"). ADVI: approx = pm.fit(n=10000, method="advi"), trace = approx.sample(1000). Gaussian process: cov_func = pm.gp.cov.ExpQuad(input_dim=1, ls=1), gp = pm.gp.Marginal(cov_func=cov_func). Claude Code generates PyMC Bayesian regression pipelines, A/B test models, hierarchical models, and GP regression scripts.
CLAUDE.md for PyMC
## PyMC Stack
- Version: pymc >= 5.0
- Model: with pm.Model(coords={"group": labels}) as model:
- Priors: pm.Normal/Beta/Exponential/HalfNormal/StudentT/Dirichlet
- Likelihood: pm.Normal/Bernoulli/Poisson/NegativeBinomial(observed=data)
- Sample: pm.sample(draws=2000, tune=1000, chains=4, target_accept=0.9)
- Posterior: az.summary(trace) | az.plot_trace | az.plot_posterior
- Predictive: pm.sample_posterior_predictive(trace) → ppc
- ADVI: pm.fit(n=10000, method="advi").sample(1000) for fast approximation
PyMC Bayesian Modeling Pipeline
# ml/pymc_pipeline.py — probabilistic programming with PyMC
from __future__ import annotations
import warnings
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
import arviz as az
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
# Reproducibility
SEED = 42
np.random.seed(SEED)
# ── 1. Bayesian linear regression ─────────────────────────────────────────────
def bayesian_linear_regression(
X: np.ndarray,
y: np.ndarray,
draws: int = 2000,
tune: int = 1000,
chains: int = 4,
target_accept: float = 0.9,
prior_mu_sigma: float = 10.0,
prior_sigma: float = 1.0,
) -> tuple[pm.Model, az.InferenceData]:
"""
Bayesian linear regression with weakly informative priors.
Returns model and trace (ArviZ InferenceData).
"""
n_features = X.shape[1] if X.ndim > 1 else 1
with pm.Model() as model:
# Priors
alpha = pm.Normal("alpha", mu=0, sigma=prior_mu_sigma)
beta = pm.Normal("beta", mu=0, sigma=prior_mu_sigma, shape=n_features)
sigma = pm.HalfNormal("sigma", sigma=prior_sigma)
# Likelihood
mu = pm.Deterministic("mu", alpha + pm.math.dot(X, beta))
obs = pm.Normal("obs", mu=mu, sigma=sigma, observed=y)
# Sample
trace = pm.sample(
draws=draws, tune=tune, chains=chains,
target_accept=target_accept, random_seed=SEED,
progressbar=True,
)
return model, trace
# ── 2. Bayesian logistic regression ──────────────────────────────────────────
def bayesian_logistic_regression(
X: np.ndarray,
y: np.ndarray,
draws: int = 2000,
tune: int = 1000,
chains: int = 2,
) -> tuple[pm.Model, az.InferenceData]:
"""Bayesian logistic regression for binary classification."""
n_features = X.shape[1]
with pm.Model() as model:
X_data = pm.Data("X", X)
alpha = pm.Normal("alpha", mu=0, sigma=2)
beta = pm.Normal("beta", mu=0, sigma=2, shape=n_features)
logit_p = alpha + pm.math.dot(X_data, beta)
p = pm.Deterministic("p", pm.math.sigmoid(logit_p))
obs = pm.Bernoulli("obs", p=p, observed=y)
trace = pm.sample(draws, tune=tune, chains=chains,
target_accept=0.9, random_seed=SEED, progressbar=True)
return model, trace
# ── 3. A/B test (Bayesian) ────────────────────────────────────────────────────
def bayesian_ab_test(
control_successes: int,
control_trials: int,
treatment_successes: int,
treatment_trials: int,
draws: int = 5000,
) -> dict:
"""
Bayesian A/B test for conversion rates.
Returns posterior samples and probability that treatment > control.
"""
with pm.Model():
# Beta priors (uniform = Beta(1,1))
p_a = pm.Beta("p_control", alpha=1, beta=1)
p_b = pm.Beta("p_treatment", alpha=1, beta=1)
obs_a = pm.Binomial("obs_control", n=control_trials, p=p_a,
observed=control_successes)
obs_b = pm.Binomial("obs_treatment", n=treatment_trials, p=p_b,
observed=treatment_successes)
delta = pm.Deterministic("delta", p_b - p_a)
rel_uplift = pm.Deterministic("rel_uplift", (p_b - p_a) / p_a)
trace = pm.sample(draws=draws, tune=1000, chains=2,
progressbar=False, random_seed=SEED)
p_treatment_wins = float((trace.posterior["delta"] > 0).mean())
delta_samples = trace.posterior["delta"].values.flatten()
uplift_samples = trace.posterior["rel_uplift"].values.flatten()
return {
"p_treatment_better": round(p_treatment_wins, 4),
"delta_mean": round(float(delta_samples.mean()), 4),
"delta_hdi_95": [round(float(np.percentile(delta_samples, 2.5)), 4),
round(float(np.percentile(delta_samples, 97.5)), 4)],
"rel_uplift_median": round(float(np.median(uplift_samples)), 4),
}
# ── 4. Hierarchical model ─────────────────────────────────────────────────────
def hierarchical_regression(
X: np.ndarray,
y: np.ndarray,
group_idx: np.ndarray, # Integer group index per observation
n_groups: int,
group_names: list[str] = None,
draws: int = 2000,
tune: int = 1000,
) -> tuple[pm.Model, az.InferenceData]:
"""
Hierarchical (partial pooling) linear regression.
Each group has its own intercept, shrunk toward a global mean.
Addresses the regularization vs. per-group estimation tradeoff.
"""
n_features = X.shape[1]
group_names = group_names or [str(i) for i in range(n_groups)]
coords = {"group": group_names, "feature": list(range(n_features))}
with pm.Model(coords=coords) as model:
# Hyperpriors
mu_alpha = pm.Normal("mu_alpha", 0, 5)
sigma_alpha = pm.HalfNormal("sigma_alpha", 1)
# Group-level intercepts (non-centered parameterization)
alpha_offset = pm.Normal("alpha_offset", 0, 1, dims="group")
alpha = pm.Deterministic("alpha", mu_alpha + alpha_offset * sigma_alpha,
dims="group")
# Shared slope
beta = pm.Normal("beta", 0, 2, dims="feature")
sigma = pm.HalfNormal("sigma", 1)
mu = alpha[group_idx] + pm.math.dot(X, beta)
obs = pm.Normal("obs", mu=mu, sigma=sigma, observed=y)
trace = pm.sample(draws, tune=tune, chains=2,
target_accept=0.9, random_seed=SEED, progressbar=True)
return model, trace
# ── 5. Posterior analysis ─────────────────────────────────────────────────────
def summarize_posterior(
trace: az.InferenceData,
var_names: list[str] = None,
hdi_prob: float = 0.94,
) -> pd.DataFrame:
"""
Compute posterior summary statistics.
Returns DataFrame with mean, sd, HDI bounds, ESS, R̂.
"""
summary = az.summary(trace, var_names=var_names, hdi_prob=hdi_prob)
return summary
def compute_hdi(
samples: np.ndarray,
hdi_prob: float = 0.94,
) -> tuple[float, float]:
"""Compute highest density interval for a 1D array of posterior samples."""
hdi = az.hdi(samples.flatten(), hdi_prob=hdi_prob)
return float(hdi[0]), float(hdi[1])
def posterior_predictive_check(
model: pm.Model,
trace: az.InferenceData,
n_samples: int = 500,
) -> az.InferenceData:
"""
Sample from posterior predictive distribution.
Use to check if model generates data similar to observations.
"""
with model:
ppc = pm.sample_posterior_predictive(trace, random_seed=SEED)
return ppc
# ── 6. Gaussian Process regression ───────────────────────────────────────────
def gaussian_process_regression(
X_train: np.ndarray, # (N, 1) or (N, D)
y_train: np.ndarray,
X_test: np.ndarray,
noise_sigma: float = 0.1,
draws: int = 1000,
tune: int = 500,
) -> dict:
"""
GP regression with squared exponential kernel.
Returns posterior mean and std at test points.
"""
input_dim = X_train.shape[1] if X_train.ndim > 1 else 1
with pm.Model() as gp_model:
# Kernel hyperpriors
ls = pm.Gamma("ls", alpha=2, beta=1) # Length scale
eta = pm.HalfNormal("eta", sigma=1) # Amplitude
noise = pm.HalfNormal("noise", sigma=noise_sigma)
cov_func = eta**2 * pm.gp.cov.ExpQuad(input_dim=input_dim, ls=ls)
gp = pm.gp.Marginal(cov_func=cov_func)
y_ = gp.marginal_likelihood("y", X=X_train, y=y_train, noise=noise)
mp = pm.find_MAP()
with gp_model:
mean_pred, var_pred = gp.predict(X_test, point=mp, diag=True, pred_noise=False)
return {
"mean": mean_pred,
"std": np.sqrt(var_pred),
"lower": mean_pred - 2 * np.sqrt(var_pred),
"upper": mean_pred + 2 * np.sqrt(var_pred),
}
# ── 7. ADVI for fast approximation ───────────────────────────────────────────
def variational_inference(
model: pm.Model,
n_iter: int = 10_000,
method: str = "advi",
n_samples: int = 1000,
) -> az.InferenceData:
"""
Mean-field variational inference as a fast alternative to MCMC.
ADVI: Automatic Differentiation Variational Inference.
Good for quick exploration; NUTS is more accurate for final results.
"""
with model:
approx = pm.fit(n=n_iter, method=method, progressbar=True)
trace = approx.sample(n_samples, random_seed=SEED)
return trace
# ── Demo ──────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("PyMC Bayesian Demo")
print("="*50)
# Bayesian A/B test
print("\nBayesian A/B Test:")
result = bayesian_ab_test(
control_successes=120, control_trials=1000,
treatment_successes=150, treatment_trials=1000,
draws=3000,
)
print(f" P(treatment > control) = {result['p_treatment_better']:.3f}")
print(f" Δ = {result['delta_mean']:.4f} 95% HDI: {result['delta_hdi_95']}")
print(f" Relative uplift: {result['rel_uplift_median']*100:.1f}%")
# Bayesian linear regression
print("\nBayesian Linear Regression:")
X = np.random.normal(0, 1, (100, 2))
y = 2 + 3 * X[:, 0] - 1.5 * X[:, 1] + np.random.normal(0, 0.5, 100)
_, trace = bayesian_linear_regression(
X, y, draws=500, tune=500, chains=2
)
summary = summarize_posterior(trace, var_names=["alpha", "beta", "sigma"])
print(summary[["mean", "sd", "hdi_3%", "hdi_97%"]].head(6))
For the scikit-learn BayesianRidge alternative when needing quick Bayesian regression with closed-form evidence approximation — sklearn BayesianRidge is instantaneous while PyMC’s NUTS sampler handles any likelihood (Student-t for outliers, Poisson for counts, Beta for proportions), multi-level hierarchical structures with partial pooling, and the posterior predictive check pattern for model validation that is impossible with point estimates, making PyMC essential for rigorous uncertainty quantification and scientific modeling. For the TensorFlow Probability alternative when building deep probabilistic models with VI layers — TFP integrates deep learning with probability while PyMC’s model context manager syntax closely mirrors statistical notation, ArviZ integration provides immediate access to convergence diagnostics (R̂, ESS), and models like hierarchical Gaussian processes or mixture models that would require hundreds of TFP lines fit in 20 lines of PyMC, making it the preferred framework for statisticians and researchers doing Bayesian data analysis. The Claude Skills 360 bundle includes PyMC skill sets covering Bayesian linear regression, logistic regression, A/B testing, hierarchical models with partial pooling, posterior analysis with ArviZ, posterior predictive checks, Gaussian process regression, and ADVI variational inference. Start with the free tier to try probabilistic programming code generation.