SciPy provides essential scientific computing routines built on NumPy. pip install scipy. from scipy import optimize, signal, stats, sparse, linalg, interpolate, fft, spatial, ndimage. Optimization: result = optimize.minimize(f, x0, method="L-BFGS-B", bounds=[(0,None)]) — result.x, result.fun. Curve fitting: popt, pcov = optimize.curve_fit(model_fn, xdata, ydata, p0=initial). Global: optimize.differential_evolution(f, bounds). Root finding: optimize.brentq(f, a, b). Linear programming: optimize.linprog(c, A_ub=A, b_ub=b, method="highs"). Statistics: stats.ttest_ind(group_a, group_b, equal_var=False), stats.mannwhitneyu(a, b, alternative="two-sided"). Distributions: stats.norm.ppf(0.975), stats.chi2.sf(stat, df=k-1). Correlation: r, p = stats.pearsonr(x, y), rho, p = stats.spearmanr(x, y). KS test: stats.kstest(data, "norm", args=(mu, sigma)). Signal: sos = signal.butter(N=5, Wn=0.1, btype="low", output="sos"), filtered = signal.sosfiltfilt(sos, data). Peaks: peaks, props = signal.find_peaks(data, height=0.5, distance=10). Spectrogram: f, t, Sxx = signal.spectrogram(x, fs=1000). FFT: X = fft.rfft(data), freqs = fft.rfftfreq(len(data), d=1/fs). Interpolate: cs = interpolate.CubicSpline(x, y), y_interp = cs(x_new). KDTree: tree = spatial.KDTree(points), dists, inds = tree.query(query, k=5). Sparse: A = sparse.csr_matrix(dense), x = sparse.linalg.spsolve(A, b). Claude Code generates SciPy optimization solvers, signal filters, hypothesis test suites, and spatial search pipelines.
CLAUDE.md for SciPy
## SciPy Stack
- Version: scipy >= 1.13
- Optimize: optimize.minimize(f, x0, method, bounds) | curve_fit | differential_evolution
- Stats: stats.ttest_ind | mannwhitneyu | pearsonr | kstest | norm/t/chi2 distributions
- Signal: signal.butter + sosfiltfilt | find_peaks | spectrogram | welch
- FFT: fft.rfft(x) | rfftfreq(n, d=1/fs) | ifft for inverse
- Interpolate: CubicSpline(x, y)(x_new) | interp1d | griddata for scattered
- Spatial: KDTree(pts).query(q, k) | distance_matrix | ConvexHull
- Sparse: sparse.csr_matrix | spsolve | eigsh for large sparse eigenvalues
SciPy Scientific Computing Pipeline
# science/scipy_pipeline.py — scientific computing with SciPy
from __future__ import annotations
import numpy as np
import pandas as pd
from typing import Callable
from scipy import optimize, signal, stats, sparse, linalg, interpolate, spatial
from scipy.fft import rfft, irfft, rfftfreq, fft, ifft
# ── 1. Optimization ───────────────────────────────────────────────────────────
def minimize_function(
objective: Callable,
x0: np.ndarray,
method: str = "L-BFGS-B", # "L-BFGS-B" | "SLSQP" | "Nelder-Mead" | "trust-ncg"
bounds: list[tuple] = None,
constraints: list[dict] = None,
options: dict = None,
) -> dict:
"""
General-purpose optimization wrapper.
method recommendations:
- L-BFGS-B: fast, handles bounds, gradient-based (default for smooth)
- SLSQP: handles equality and inequality constraints
- Nelder-Mead: gradient-free (useful for noisy or non-differentiable)
- differential_evolution: global optimizer for non-convex problems
"""
result = optimize.minimize(
objective, x0,
method=method,
bounds=bounds,
constraints=constraints or [],
options=options or {"maxiter": 10000, "ftol": 1e-9},
)
return {
"x": result.x,
"fun": result.fun,
"success": result.success,
"message": result.message,
"n_iter": result.get("nit", None),
}
def global_optimize(
objective: Callable,
bounds: list[tuple], # List of (min, max) per dimension
maxiter: int = 1000,
seed: int = 42,
workers: int = 1,
) -> dict:
"""
Global optimization via Differential Evolution.
Use when the objective has multiple local minima.
"""
result = optimize.differential_evolution(
objective, bounds, maxiter=maxiter, seed=seed,
workers=workers, tol=1e-7, popsize=15,
)
return {"x": result.x, "fun": result.fun, "success": result.success}
def fit_curve(
model_fn: Callable,
xdata: np.ndarray,
ydata: np.ndarray,
p0: list = None,
bounds: tuple = (-np.inf, np.inf),
sigma: np.ndarray = None, # Observation uncertainties for weighted fit
) -> tuple[np.ndarray, np.ndarray]:
"""
Nonlinear least-squares curve fitting.
Returns (optimal_params, covariance_matrix).
Confidence intervals: ±1.96 * sqrt(diag(pcov)) for 95% CI.
"""
popt, pcov = optimize.curve_fit(
model_fn, xdata, ydata, p0=p0, bounds=bounds, sigma=sigma,
maxfev=10000, method="trf",
)
param_ci95 = 1.96 * np.sqrt(np.diag(pcov))
return popt, pcov
def find_root(
f: Callable,
a: float,
b: float,
method: str = "brentq",
) -> float:
"""Find root of f in interval [a, b]."""
return optimize.brentq(f, a, b) if method == "brentq" else optimize.bisect(f, a, b)
# ── 2. Statistical tests ──────────────────────────────────────────────────────
def compare_two_groups(
group_a: np.ndarray,
group_b: np.ndarray,
alpha: float = 0.05,
paired: bool = False,
) -> dict:
"""
Compare two groups with appropriate tests.
Returns results from both parametric and non-parametric tests.
"""
# Normality (Shapiro-Wilk, only for n < 5000)
normal_a = stats.shapiro(group_a[:5000]).pvalue > alpha if len(group_a) < 5000 else True
normal_b = stats.shapiro(group_b[:5000]).pvalue > alpha if len(group_b) < 5000 else True
if paired:
t_stat, t_p = stats.ttest_rel(group_a, group_b)
u_stat, u_p = stats.wilcoxon(group_a, group_b)
else:
# Levene's test for equal variance
lev_p = stats.levene(group_a, group_b).pvalue
equal_var = lev_p > alpha
t_stat, t_p = stats.ttest_ind(group_a, group_b, equal_var=equal_var)
u_stat, u_p = stats.mannwhitneyu(group_a, group_b, alternative="two-sided")
effect_size = (np.mean(group_a) - np.mean(group_b)) / np.sqrt(
(np.std(group_a, ddof=1)**2 + np.std(group_b, ddof=1)**2) / 2
)
return {
"both_normal": normal_a and normal_b,
"t_statistic": round(t_stat, 4),
"t_pvalue": round(t_p, 6),
"u_statistic": round(u_stat, 4),
"u_pvalue": round(u_p, 6),
"cohens_d": round(effect_size, 4),
"significant": t_p < alpha,
"n_a": len(group_a), "n_b": len(group_b),
}
def chi_square_test(
observed: np.ndarray, # Contingency table (2D) or frequencies (1D)
expected: np.ndarray = None,
) -> dict:
"""Chi-square goodness-of-fit or independence test."""
if observed.ndim == 1:
stat, p = stats.chisquare(observed, f_exp=expected)
dof = len(observed) - 1
else:
stat, p, dof, _expected = stats.chi2_contingency(observed)
cramers_v = np.sqrt(stat / (observed.sum() * (min(observed.shape) - 1))) if observed.ndim == 2 else None
return {
"chi2_stat": round(stat, 4),
"p_value": round(p, 6),
"dof": dof,
"cramers_v": round(cramers_v, 4) if cramers_v is not None else None,
"significant": p < 0.05,
}
def bootstrap_ci(
data: np.ndarray,
stat_fn: Callable = np.mean,
n_boot: int = 10_000,
ci: float = 0.95,
random_state: int = 42,
) -> tuple[float, float]:
"""Compute bootstrap confidence interval for any statistic."""
rng = np.random.default_rng(random_state)
boots = [stat_fn(rng.choice(data, size=len(data), replace=True)) for _ in range(n_boot)]
alpha = (1 - ci) / 2
return float(np.percentile(boots, alpha * 100)), float(np.percentile(boots, (1-alpha) * 100))
# ── 3. Signal processing ──────────────────────────────────────────────────────
def lowpass_filter(
data: np.ndarray,
cutoff: float, # Normalized frequency (0-1, where 1 = Nyquist)
order: int = 5,
fs: float = None, # If provided, cutoff is in Hz
) -> np.ndarray:
"""Low-pass Butterworth filter (zero-phase with sosfiltfilt)."""
Wn = cutoff / (fs / 2) if fs is not None else cutoff
sos = signal.butter(order, Wn, btype="low", output="sos")
return signal.sosfiltfilt(sos, data)
def bandpass_filter(
data: np.ndarray,
low_hz: float,
high_hz: float,
fs: float,
order: int = 5,
) -> np.ndarray:
"""Bandpass Butterworth filter."""
sos = signal.butter(order, [low_hz, high_hz], btype="bandpass",
fs=fs, output="sos")
return signal.sosfiltfilt(sos, data)
def detect_peaks(
data: np.ndarray,
min_height: float = None,
min_dist: int = 10, # Minimum samples between peaks
prominence: float = None,
) -> tuple[np.ndarray, dict]:
"""Find signal peaks with scipy.signal.find_peaks."""
kwargs = {"distance": min_dist}
if min_height is not None:
kwargs["height"] = min_height
if prominence is not None:
kwargs["prominence"] = prominence
peaks, properties = signal.find_peaks(data, **kwargs)
return peaks, properties
def compute_psd(
data: np.ndarray,
fs: float,
nperseg: int = 256,
) -> tuple[np.ndarray, np.ndarray]:
"""Power spectral density via Welch's method."""
freqs, psd = signal.welch(data, fs=fs, nperseg=nperseg)
return freqs, psd
# ── 4. FFT ────────────────────────────────────────────────────────────────────
def analyze_frequency_content(
signal_data: np.ndarray,
fs: float,
n_top: int = 5,
) -> dict:
"""Analyze dominant frequencies in a signal."""
N = len(signal_data)
X = rfft(signal_data * np.hanning(N))
freqs = rfftfreq(N, d=1/fs)
power = np.abs(X)**2 / N
top_indices = np.argsort(power)[::-1][:n_top]
return {
"freqs": freqs,
"power": power,
"dominant_freqs": freqs[top_indices].tolist(),
"dominant_powers": power[top_indices].tolist(),
"total_power": float(power.sum()),
}
# ── 5. Interpolation ──────────────────────────────────────────────────────────
def interpolate_1d(
x: np.ndarray,
y: np.ndarray,
x_new: np.ndarray,
kind: str = "cubic", # "linear" | "cubic" | "nearest" | "quadratic"
) -> np.ndarray:
"""1D interpolation at new x positions."""
f = interpolate.interp1d(x, y, kind=kind, fill_value="extrapolate")
return f(x_new)
def spline_smooth(
x: np.ndarray,
y: np.ndarray,
x_new: np.ndarray,
smooth: float = None,
) -> np.ndarray:
"""Smooth cubic spline interpolation (handles noisy data)."""
spl = interpolate.UnivariateSpline(x, y, s=smooth)
return spl(x_new)
# ── 6. Spatial queries ────────────────────────────────────────────────────────
def build_kdtree(points: np.ndarray) -> spatial.KDTree:
"""Build a KDTree for fast nearest-neighbor queries."""
return spatial.KDTree(points)
def nearest_neighbors(
tree: spatial.KDTree,
queries: np.ndarray,
k: int = 5,
radius: float = None,
) -> tuple[np.ndarray, np.ndarray]:
"""
Query nearest neighbors.
Returns (distances, indices).
If radius is set, finds all points within that radius.
"""
if radius is not None:
results = tree.query_ball_point(queries, r=radius)
return results, None
return tree.query(queries, k=k)
# ── Demo ──────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("SciPy Demo")
print("="*50)
# Optimization: minimize Rosenbrock function
def rosenbrock(x):
return sum(100*(x[1:]-x[:-1]**2)**2 + (1-x[:-1])**2)
result = minimize_function(rosenbrock, x0=np.array([-1.0, 0.0]))
print(f"\nRosenbrock minimum: x={result['x'].round(4)}, f={result['fun']:.6f}")
# Curve fitting: exponential decay
t = np.linspace(0, 5, 100)
y_obs = 3.0 * np.exp(-0.7 * t) + np.random.normal(0, 0.1, 100)
popt, pcov = fit_curve(lambda x, a, b: a * np.exp(-b * x), t, y_obs, p0=[1, 1])
print(f"\nCurve fit: A={popt[0]:.3f}, b={popt[1]:.3f}")
# Statistical test
a = np.random.normal(10, 2, 100)
b = np.random.normal(11, 2, 100)
result = compare_two_groups(a, b)
print(f"\nt-test: t={result['t_statistic']}, p={result['t_pvalue']:.4f}, d={result['cohens_d']}")
# FFT analysis
fs = 1000
t = np.linspace(0, 1, fs)
sig = np.sin(2*np.pi*50*t) + 0.5*np.sin(2*np.pi*120*t) + np.random.normal(0, 0.1, fs)
freq_info = analyze_frequency_content(sig, fs, n_top=3)
print(f"\nDominant frequencies: {[f'{f:.1f} Hz' for f in freq_info['dominant_freqs']]}")
# KDTree
pts = np.random.rand(1000, 2)
tree = build_kdtree(pts)
dists, inds = nearest_neighbors(tree, np.array([[0.5, 0.5]]), k=3)
print(f"\nNearest 3 neighbors to (0.5, 0.5): distances={dists[0].round(4)}")
For the NumPy-only alternative when operations fit within NumPy’s vectorized array functions — NumPy is sufficient for basic linear algebra while SciPy’s optimize.minimize with automatic differentiation via L-BFGS-B, signal.sosfiltfilt zero-phase filtering, and stats.ttest_ind with proper Welch correction add scientific rigor that hand-rolled NumPy equivalents lack, and the sparse module handles matrices with > 99% sparsity at 50-100x less memory than dense arrays. For the statsmodels alternative for statistical tests — statsmodels provides regression-centric inference while SciPy’s stats module covers the full test suite (Kolmogorov-Smirnov, Anderson-Darling, Mann-Whitney, Kruskal-Wallis) with distribution-free options ideal for experimental data that violates normality assumptions, and the bootstrap_ci pattern via scipy.stats.bootstrap is the modern nonparametric alternative to asymptotic CIs for any estimable statistic. The Claude Skills 360 bundle includes SciPy skill sets covering L-BFGS-B and differential evolution optimization, curve fitting, statistical hypothesis tests, bootstrap CIs, Butterworth signal filters, peak detection, Welch PSD, FFT frequency analysis, cubic spline interpolation, and KDTree spatial queries. Start with the free tier to try scientific computing code generation.