NumPy is the foundation of Python numerical computing. pip install numpy. Array: import numpy as np; a = np.array([1,2,3]). zeros: np.zeros((3,4)). ones: np.ones((2,3), dtype=np.float32). arange: np.arange(0, 10, 0.5). linspace: np.linspace(0, 1, 100). eye: np.eye(4) — identity. shape: a.shape → (3,). dtype: a.dtype. ndim: a.ndim. reshape: a.reshape(3,1). flatten: a.flatten(). T: a.T transpose. squeeze: np.squeeze(a). expand_dims: np.expand_dims(a, axis=0). slice: a[1:5:2]. bool mask: a[a>0]. fancy index: a[[0,2,4]]. dot: np.dot(A,B) or A@B. matmul: np.matmul(A,B). sum: a.sum(axis=0). mean: a.mean(). std: np.std(a). var: np.var(a). min/max: a.min(); a.argmax(). cumsum: np.cumsum(a). diff: np.diff(a). where: np.where(a>0, a, 0). clip: np.clip(a, 0, 1). abs: np.abs(a). sqrt: np.sqrt(a). log/exp: np.log(a); np.exp(a). concatenate: np.concatenate([a, b], axis=0). stack: np.stack([a,b], axis=1). split: np.split(a, 3). vstack/hstack: np.vstack([a,b]). unique: np.unique(a). sort: np.sort(a). argsort: np.argsort(a). linalg.inv: np.linalg.inv(M). linalg.eig: np.linalg.eig(M). linalg.norm: np.linalg.norm(v). linalg.solve: np.linalg.solve(A, b). random.default_rng: rng = np.random.default_rng(42); rng.normal(0, 1, (100,)). save: np.save("arr.npy", a). load: np.load("arr.npy"). savez: np.savez("data.npz", x=x, y=y). Claude Code generates NumPy array pipelines, linear algebra routines, vectorized transforms, and numerical analysis.
CLAUDE.md for NumPy
## NumPy Stack
- Version: numpy >= 1.26 | pip install numpy
- Create: np.array([...]) | np.zeros/ones/eye/arange/linspace((shape), dtype=...)
- Transform: a.reshape(r,c) | a.T | np.concatenate/stack/split | a[mask]
- Math: a@b | np.dot(a,b) | a.sum/mean/std/min/max(axis=n) | np.where(cond, x, y)
- Linalg: np.linalg.inv/eig/norm/solve | np.linalg.svd
- RNG: rng = np.random.default_rng(seed) | rng.normal/uniform/integers(shape)
NumPy Numerical Computing Pipeline
# app/numeric.py — NumPy arrays, transforms, linear algebra, stats, random, save/load
from __future__ import annotations
from pathlib import Path
from typing import Any
import numpy as np
# ─────────────────────────────────────────────────────────────────────────────
# 1. Array construction
# ─────────────────────────────────────────────────────────────────────────────
def make_grid(
rows: int,
cols: int,
value: float = 0.0,
dtype: Any = np.float64,
) -> np.ndarray:
"""
Create a 2D grid filled with a value.
Example:
grid = make_grid(3, 4, value=1.0) # 3×4 array of 1.0
mask = make_grid(5, 5, dtype=bool) # 5×5 bool mask
"""
return np.full((rows, cols), value, dtype=dtype)
def from_ranges(
x_start: float, x_end: float, x_steps: int,
y_start: float, y_end: float, y_steps: int,
) -> tuple[np.ndarray, np.ndarray]:
"""
Create meshgrid from two ranges.
Example:
X, Y = from_ranges(-1, 1, 100, -1, 1, 100)
Z = np.sin(X) * np.cos(Y)
"""
x = np.linspace(x_start, x_end, x_steps)
y = np.linspace(y_start, y_end, y_steps)
return np.meshgrid(x, y)
def one_hot(indices: np.ndarray, num_classes: int) -> np.ndarray:
"""
Convert integer class labels to one-hot encoded matrix.
Example:
labels = np.array([0, 2, 1, 2])
oh = one_hot(labels, num_classes=3)
# shape (4, 3)
"""
n = len(indices)
out = np.zeros((n, num_classes), dtype=np.float32)
out[np.arange(n), indices] = 1.0
return out
def sliding_window_view(arr: np.ndarray, window: int, step: int = 1) -> np.ndarray:
"""
Create a view of overlapping windows along axis 0 (read-only).
Example:
arr = np.arange(10, dtype=float)
windows = sliding_window_view(arr, window=3)
# shape (8, 3) — 8 windows of size 3
"""
return np.lib.stride_tricks.sliding_window_view(arr, window_shape=window)[::step]
# ─────────────────────────────────────────────────────────────────────────────
# 2. Array transforms
# ─────────────────────────────────────────────────────────────────────────────
def normalize_minmax(arr: np.ndarray, axis: int | None = None, eps: float = 1e-8) -> np.ndarray:
"""
Min-max normalize to [0, 1].
Example:
X_norm = normalize_minmax(X, axis=0) # normalize each column
"""
lo = arr.min(axis=axis, keepdims=True)
hi = arr.max(axis=axis, keepdims=True)
return (arr - lo) / (hi - lo + eps)
def normalize_zscore(arr: np.ndarray, axis: int | None = None, eps: float = 1e-8) -> np.ndarray:
"""
Z-score standardize (mean=0, std=1).
Example:
X_std = normalize_zscore(X, axis=0)
"""
mean = arr.mean(axis=axis, keepdims=True)
std = arr.std(axis=axis, keepdims=True)
return (arr - mean) / (std + eps)
def clip_outliers(
arr: np.ndarray,
lo_pct: float = 1.0,
hi_pct: float = 99.0,
) -> np.ndarray:
"""
Clip array values at given percentiles.
Example:
cleaned = clip_outliers(prices, lo_pct=5, hi_pct=95)
"""
lo = np.percentile(arr, lo_pct)
hi = np.percentile(arr, hi_pct)
return np.clip(arr, lo, hi)
def batch_iter(arr: np.ndarray, batch_size: int):
"""
Yield batches of an array.
Example:
for batch in batch_iter(X_train, batch_size=32):
loss = model.forward(batch)
"""
for start in range(0, len(arr), batch_size):
yield arr[start : start + batch_size]
def pad_to(arr: np.ndarray, length: int, pad_value: float = 0.0, axis: int = 0) -> np.ndarray:
"""
Pad or truncate arr along axis to exactly `length`.
Example:
seq = pad_to(embedding_seq, length=512)
"""
current = arr.shape[axis]
if current == length:
return arr
if current > length:
return arr.take(range(length), axis=axis)
pad_shape = list(arr.shape)
pad_shape[axis] = length - current
pad = np.full(pad_shape, pad_value, dtype=arr.dtype)
return np.concatenate([arr, pad], axis=axis)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Linear algebra
# ─────────────────────────────────────────────────────────────────────────────
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float | np.ndarray:
"""
Cosine similarity between two vectors or between rows of two matrices.
Example:
score = cosine_similarity(vec_a, vec_b) # scalar
scores = cosine_similarity(query_mat, doc_mat) # matrix
"""
a_norm = a / (np.linalg.norm(a, axis=-1, keepdims=True) + 1e-8)
b_norm = b / (np.linalg.norm(b, axis=-1, keepdims=True) + 1e-8)
return a_norm @ b_norm.T if a.ndim > 1 else float(a_norm @ b_norm)
def pairwise_distances(
X: np.ndarray,
metric: str = "euclidean",
) -> np.ndarray:
"""
Compute pairwise distance matrix.
Example:
D = pairwise_distances(embeddings, metric="cosine")
"""
if metric == "euclidean":
diff = X[:, np.newaxis, :] - X[np.newaxis, :, :]
return np.sqrt((diff ** 2).sum(axis=-1))
if metric == "cosine":
norms = np.linalg.norm(X, axis=1, keepdims=True) + 1e-8
Xn = X / norms
return 1.0 - Xn @ Xn.T
raise ValueError(f"Unknown metric: {metric!r}")
def pca(X: np.ndarray, n_components: int = 2) -> tuple[np.ndarray, np.ndarray]:
"""
Simple PCA via SVD.
Returns (projected, explained_variance_ratio).
Example:
X_2d, var = pca(embeddings, n_components=2)
plt.scatter(X_2d[:, 0], X_2d[:, 1])
"""
X_centered = X - X.mean(axis=0)
_, S, Vt = np.linalg.svd(X_centered, full_matrices=False)
components = Vt[:n_components]
projected = X_centered @ components.T
total_var = (S ** 2).sum()
evr = (S[:n_components] ** 2) / total_var
return projected, evr
def solve_linear(A: np.ndarray, b: np.ndarray) -> np.ndarray:
"""
Solve Ax = b for x using least squares (works for overdetermined systems).
Example:
x = solve_linear(A, b)
"""
result, _, _, _ = np.linalg.lstsq(A, b, rcond=None)
return result
# ─────────────────────────────────────────────────────────────────────────────
# 4. Statistics helpers
# ─────────────────────────────────────────────────────────────────────────────
def describe_array(arr: np.ndarray) -> dict:
"""
Descriptive statistics for a 1D array.
Example:
stats = describe_array(prices)
print(f"mean={stats['mean']:.2f} p95={stats['p95']:.2f}")
"""
arr = arr.ravel()
return {
"n": int(len(arr)),
"mean": float(np.mean(arr)),
"std": float(np.std(arr)),
"min": float(np.min(arr)),
"p25": float(np.percentile(arr, 25)),
"p50": float(np.percentile(arr, 50)),
"p75": float(np.percentile(arr, 75)),
"p95": float(np.percentile(arr, 95)),
"max": float(np.max(arr)),
}
def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
"""
Compute a simple moving average.
Example:
smoothed = moving_average(signal, window=7)
"""
kernel = np.ones(window) / window
return np.convolve(arr, kernel, mode="valid")
def histogram_bins(
arr: np.ndarray,
bins: int = 10,
) -> tuple[np.ndarray, np.ndarray]:
"""
Compute histogram counts and bin edges.
Example:
counts, edges = histogram_bins(prices, bins=20)
midpoints = (edges[:-1] + edges[1:]) / 2
"""
return np.histogram(arr, bins=bins)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Random number generation (new Generator API)
# ─────────────────────────────────────────────────────────────────────────────
def make_rng(seed: int | None = None) -> np.random.Generator:
"""
Create a reproducible random number generator.
Example:
rng = make_rng(42)
data = rng.normal(0, 1, (100, 10))
labels = rng.integers(0, 5, size=100)
"""
return np.random.default_rng(seed)
def random_split(
*arrays: np.ndarray,
test_size: float = 0.2,
seed: int = 42,
) -> list[np.ndarray]:
"""
Split arrays into train/test subsets (aligned shuffle).
Example:
X_train, X_test, y_train, y_test = random_split(X, y, test_size=0.2)
"""
rng = make_rng(seed)
n = len(arrays[0])
idx = rng.permutation(n)
split_at = int(n * (1 - test_size))
train_idx, test_idx = idx[:split_at], idx[split_at:]
result = []
for arr in arrays:
result.append(arr[train_idx])
result.append(arr[test_idx])
return result
# ─────────────────────────────────────────────────────────────────────────────
# 6. Persistence
# ─────────────────────────────────────────────────────────────────────────────
def save_arrays(path: str | Path, **arrays: np.ndarray) -> Path:
"""
Save named arrays to .npz file.
Example:
save_arrays("data/embeddings.npz", X=X_train, y=y_train)
"""
p = Path(path)
p.parent.mkdir(parents=True, exist_ok=True)
np.savez_compressed(str(p), **arrays)
return p.with_suffix(".npz")
def load_arrays(path: str | Path) -> dict[str, np.ndarray]:
"""
Load arrays from .npz file.
Example:
data = load_arrays("data/embeddings.npz")
X, y = data["X"], data["y"]
"""
npz = np.load(str(path))
return dict(npz)
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
rng = make_rng(42)
print("=== Array construction ===")
print(f" make_grid(2,3): {make_grid(2,3,1.0)}")
oh = one_hot(np.array([0,2,1,2]), 3)
print(f" one_hot shape: {oh.shape}")
print("\n=== Transforms ===")
X = rng.normal(5, 2, (100, 4))
Xn = normalize_zscore(X, axis=0)
print(f" zscore mean: {Xn.mean(axis=0).round(3)} std: {Xn.std(axis=0).round(3)}")
Xmm = normalize_minmax(X, axis=0)
print(f" minmax min: {Xmm.min(axis=0).round(3)} max: {Xmm.max(axis=0).round(3)}")
print("\n=== Linear algebra ===")
A = rng.normal(0, 1, (50, 4))
X_2d, evr = pca(A, n_components=2)
print(f" PCA 2D shape: {X_2d.shape}, EVR: {evr.round(3)}")
v1 = rng.normal(0, 1, (128,))
v2 = rng.normal(0, 1, (128,))
print(f" cosine_sim(v1,v2): {cosine_similarity(v1, v2):.4f}")
print("\n=== Statistics ===")
prices = rng.exponential(scale=100, size=1000)
stats = describe_array(prices)
print(f" mean={stats['mean']:.1f} std={stats['std']:.1f} p95={stats['p95']:.1f}")
smoothed = moving_average(prices[:50], window=5)
print(f" moving_avg window=5 → {len(smoothed)} points")
print("\n=== Random split ===")
y = rng.integers(0, 3, size=100)
X_tr, X_te, y_tr, y_te = random_split(X, y, test_size=0.2)
print(f" train={len(X_tr)}, test={len(X_te)}")
print("\n=== Save / load ===")
import tempfile, os
with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as tf:
tmp = tf.name
save_arrays(tmp, X=X_tr, y=y_tr)
loaded = load_arrays(tmp + ".npz" if not tmp.endswith(".npz") else tmp)
os.unlink(tmp if not tmp.endswith(".npz") else tmp)
print(f" loaded keys: {list(loaded.keys())}, X shape: {loaded['X'].shape}")
For the PyTorch / JAX alternative — PyTorch and JAX provide GPU-accelerated tensor operations with automatic differentiation; NumPy runs on CPU only (no autograd) but is battle-tested, has zero setup, and is the interchange format for nearly every Python scientific library (scikit-learn, scipy, pandas, matplotlib) — use NumPy for CPU data processing, feature engineering, and linear algebra where GPU acceleration is not needed, PyTorch for deep learning model training, JAX for differentiable programs and research requiring both JIT compilation and autograd. For the CuPy alternative — CuPy implements the NumPy/SciPy API on NVIDIA GPUs; code written with CuPy is nearly identical to NumPy but runs on GPU, delivering 10–100× speedup for large array operations — use NumPy when CPU is sufficient or you need broad ecosystem compatibility, CuPy as a drop-in replacement when the same NumPy operations become the bottleneck on large arrays and a GPU is available. The Claude Skills 360 bundle includes NumPy skill sets covering make_grid()/from_ranges()/one_hot()/sliding_window_view() construction, normalize_minmax()/normalize_zscore()/clip_outliers()/batch_iter()/pad_to() transforms, cosine_similarity()/pairwise_distances()/pca()/solve_linear() linear algebra, describe_array()/moving_average()/histogram_bins() statistics, make_rng()/random_split() reproducible randomness, and save_arrays()/load_arrays() persistence. Start with the free tier to try numerical computing and array manipulation code generation.