Neptune.ai tracks ML experiments with flexible metadata namespaces. pip install neptune. run = neptune.init_run(project="workspace/project", api_token="TOKEN", tags=["gbm", "v2"], description="Churn model v2"). Log single values: run["params/lr"] = 0.05. Log series (epoch metrics): run["train/auc"].append(0.87) — call in a loop. Log artifacts: run["artifacts/model"].upload("model.pkl"). Log images: run["plots/roc_curve"].upload(File.as_image(fig)). Log DataFrames: run["data/summary"].upload(File.as_html(df)). Access: run["params/lr"].fetch(). Stop: run.stop(). Model Registry: model = neptune.init_model(key="CHURN", project="workspace/project"), model["signature"].upload("schema.json"). version = neptune.init_model_version(model=model.sys.id, project="workspace/project"), version["model"].upload("model.pkl"), version.change_stage("production"). Fetch runs: project = neptune.init_project(project="workspace/project"), runs_table = project.fetch_runs_table(tag="production").to_pandas(). Query by column: runs_table.sort_values("train/auc", ascending=False). Custom namespace: run["metadata"]["environment"] = {"python": "3.11", "library": "sklearn-1.4"}. File series: run["metrics/feature_importance"].upload_files(["importance_fold_1.csv", "importance_fold_2.csv"]). run["sys/id"].fetch() gets the run ID for reference. neptune.init_run(with_id="RUN-123") resumes an existing run. Claude Code generates Neptune training loops, model registry workflows, custom namespace structures, and TypeScript API clients.
CLAUDE.md for Neptune.ai
## Neptune Stack
- Version: neptune >= 1.9
- Init: neptune.init_run(project="workspace/project", api_token=TOKEN, tags=[...])
- Log: run["path"] = value (single) or run["path"].append(value, step=n) (series)
- Artifacts: run["artifacts/model"].upload("model.pkl")
- Model: neptune.init_model(key="KEY") → neptune.init_model_version(model=id) → .change_stage()
- Fetch: project.fetch_runs_table(tag, state).to_pandas()
- Stop: run.stop() — or use as context manager (no explicit stop needed)
Training with Neptune
# train_neptune.py — full training script with Neptune.ai tracking
from __future__ import annotations
import os
import pickle
from pathlib import Path
import neptune
import neptune.integrations.sklearn as npt_sklearn
import numpy as np
import pandas as pd
from neptune.types import File
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import (
average_precision_score,
classification_report,
roc_auc_score,
)
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
FEATURE_COLS = ["age", "tenure_days", "monthly_spend", "support_tickets", "last_login_days"]
PROJECT = os.environ.get("NEPTUNE_PROJECT", "myworkspace/churn")
API_TOKEN = os.environ.get("NEPTUNE_API_TOKEN", neptune.ANONYMOUS_API_TOKEN)
def train_and_track(
data_path: str = "data/train.csv",
n_estimators: int = 200,
learning_rate: float = 0.05,
max_depth: int = 4,
tags: list[str] | None = None,
) -> tuple[Pipeline, str]:
"""Train model and log everything to Neptune."""
tags = tags or ["sklearn", "gbm"]
with neptune.init_run(
project=PROJECT,
api_token=API_TOKEN,
tags=tags,
description=f"GBM n={n_estimators} lr={learning_rate} depth={max_depth}",
) as run:
run_id = run["sys/id"].fetch()
print(f"Neptune run: {run_id}")
# ── Log hyperparameters ───────────────────────────────────────────
run["params"] = {
"n_estimators": n_estimators,
"learning_rate": learning_rate,
"max_depth": max_depth,
"random_state": 42,
"features": FEATURE_COLS,
}
# ── Log dataset info ──────────────────────────────────────────────
df = pd.read_csv(data_path)
run["data/shape"] = {"rows": len(df), "cols": len(df.columns)}
run["data/target_rate"] = float(df["churned"].mean())
run["data/path"] = data_path
# Upload a sample of the data
run["data/sample"].upload(File.as_html(df.head(100)))
X = df[FEATURE_COLS].values
y = df["churned"].values
# ── Cross-validation with per-fold logging ────────────────────────
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_scores: list[float] = []
base_pipeline = Pipeline([
("scaler", StandardScaler()),
("clf", GradientBoostingClassifier(
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
random_state=42,
)),
])
for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
X_tr, X_val = X[train_idx], X[val_idx]
y_tr, y_val = y[train_idx], y[val_idx]
pipeline = Pipeline([
("scaler", StandardScaler()),
("clf", GradientBoostingClassifier(
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
random_state=42,
)),
])
pipeline.fit(X_tr, y_tr)
auc = roc_auc_score(y_val, pipeline.predict_proba(X_val)[:, 1])
fold_scores.append(auc)
run["cv/fold_auc"].append(auc, step=fold)
mean_auc = float(np.mean(fold_scores))
run["cv/auc_mean"] = mean_auc
run["cv/auc_std"] = float(np.std(fold_scores))
# ── Final model on full data ──────────────────────────────────────
base_pipeline.fit(X, y)
y_proba = base_pipeline.predict_proba(X)[:, 1]
y_pred = base_pipeline.predict(X)
run["metrics/train_auc"] = float(roc_auc_score(y, y_proba))
run["metrics/train_ap"] = float(average_precision_score(y, y_proba))
# Classification report
report_str = classification_report(y, y_pred, target_names=["no_churn", "churn"])
run["metrics/classification_report"].upload(File.from_content(report_str, extension="txt"))
# Feature importance
importances = base_pipeline.named_steps["clf"].feature_importances_
fi_df = pd.DataFrame({"feature": FEATURE_COLS, "importance": importances})
fi_df = fi_df.sort_values("importance", ascending=False)
run["metrics/feature_importance"].upload(File.as_html(fi_df))
# ── Use Neptune sklearn integration ───────────────────────────────
estimator_summary = npt_sklearn.create_classifier_summary(
base_pipeline, X, y, X, y
)
run["sklearn/summary"] = estimator_summary
# ── Save and upload model artifact ────────────────────────────────
Path("models").mkdir(exist_ok=True)
model_path = "models/churn_model.pkl"
with open(model_path, "wb") as f:
pickle.dump(base_pipeline, f)
run["artifacts/model"].upload(model_path)
run["artifacts/model_size_kb"] = Path(model_path).stat().st_size / 1024
print(f"\nCV AUC: {mean_auc:.4f} ± {np.std(fold_scores):.4f}")
return base_pipeline, run_id
# ── Model Registry ────────────────────────────────────────────────────────────
def register_model_version(
model_path: str = "models/churn_model.pkl",
cv_auc: float = 0.0,
stage: str = "staging",
) -> str:
"""Register a trained model in Neptune Model Registry."""
# Create or get model
with neptune.init_model(
key="CHURN",
project=PROJECT,
api_token=API_TOKEN,
) as model:
model["framework"] = "sklearn"
model["task"] = "binary_classification"
model["description"] = "Churn prediction GBM"
# Create a new version
with neptune.init_model_version(
model=f"{PROJECT.split('/')[1]}-CHURN",
project=PROJECT,
api_token=API_TOKEN,
) as version:
version["model"].upload(model_path)
version["metrics/cv_auc"] = cv_auc
version["metadata/stage"] = stage
version_id = version["sys/id"].fetch()
version.change_stage(stage)
print(f"Registered model version: {version_id} → {stage}")
return version_id
# ── Fetch and compare runs ────────────────────────────────────────────────────
def compare_runs(top_n: int = 10) -> pd.DataFrame:
"""Fetch and compare recent runs from the project."""
with neptune.init_project(project=PROJECT, api_token=API_TOKEN) as project:
runs_table = project.fetch_runs_table(
columns=["sys/id", "sys/creation_time", "cv/auc_mean", "cv/auc_std",
"params/n_estimators", "params/learning_rate", "params/max_depth"],
state="inactive",
).to_pandas()
return runs_table.sort_values("cv/auc_mean", ascending=False).head(top_n)
if __name__ == "__main__":
model, run_id = train_and_track()
compare_runs()
TypeScript REST Client
// lib/neptune/client.ts — Neptune.ai REST API client
const NEPTUNE_API = "https://app.neptune.ai/api"
const API_TOKEN = process.env.NEPTUNE_API_TOKEN ?? ""
const PROJECT = process.env.NEPTUNE_PROJECT ?? "myworkspace/churn"
async function neptuneFetch<T>(path: string, options?: RequestInit): Promise<T> {
const res = await fetch(`${NEPTUNE_API}${path}`, {
...options,
headers: {
"X-Neptune-Api-Token": API_TOKEN,
"Content-Type": "application/json",
...options?.headers,
},
})
if (!res.ok) throw new Error(`Neptune API ${res.status}: ${await res.text()}`)
return res.json()
}
export type NeptuneRun = {
shortId: string
state: string
trashed: boolean
attributes: Record<string, { type: string; value: unknown }>
}
/** Fetch a run by short ID (e.g., "CHURN-42") */
export async function getRun(runId: string): Promise<NeptuneRun> {
return neptuneFetch<NeptuneRun>(`/backend/v1/experiments/${encodeURIComponent(runId)}`)
}
/** Query runs table for a project */
export async function queryRuns(
attributePaths: string[] = ["cv/auc_mean", "params/n_estimators"],
limit: number = 20,
): Promise<NeptuneRun[]> {
const [workspace, projectName] = PROJECT.split("/")
const body = {
projectIdentifier: PROJECT,
query: { criteria: [] },
attributeFilters: attributePaths.map(p => ({ path: p })),
pagination: { limit, offset: 0 },
sorting: { dir: "descending", sortBy: { type: "numericAttribute", name: "cv/auc_mean" } },
}
const result = await neptuneFetch<{ entries: NeptuneRun[] }>(
"/backend/v1/experiments/query",
{ method: "POST", body: JSON.stringify(body) }
)
return result.entries
}
For the Weights & Biases alternative when needing the Sweeps distributed hyperparameter search, system metrics tracking, richer interactive visualizations, W&B Tables for dataset comparison, and a larger active community — W&B is more feature-complete for experiment visualization while Neptune excels at artifact versioning with large media files, custom metadata namespaces with hierarchical paths, and a more flexible run comparison query language without per-seat pricing that fits large teams logging thousands of runs. For the MLflow alternative when needing a free, self-hosted open-source tracking server that runs inside your own infrastructure with a model registry and REST API compatible with multiple ML frameworks — MLflow is the open-source default while Neptune provides a fully-managed cloud experience with better infinite storage for large artifacts and rich collaboration features without infrastructure management. The Claude Skills 360 bundle includes Neptune.ai skill sets covering run tracking, model registry versioning, sklearn integration, run comparison queries, and TypeScript API clients. Start with the free tier to try ML experiment tracking generation.