TensorFlow is Google’s deep learning framework with the Keras high-level API. pip install tensorflow. Model: from tensorflow import keras; model = keras.Sequential([keras.layers.Dense(64, activation="relu"), keras.layers.Dense(10, activation="softmax")]). compile: model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]). fit: history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2). evaluate: loss, acc = model.evaluate(X_test, y_test). predict: probs = model.predict(X_new). Functional API: inputs = keras.Input(shape=(784,)); x = keras.layers.Dense(64, activation="relu")(inputs); outputs = keras.layers.Dense(10)(x); model = keras.Model(inputs, outputs). Layers: Dense, Conv2D, MaxPool2D, LSTM, GRU, Embedding, BatchNormalization, Dropout, Flatten, GlobalAveragePooling2D. Activation: relu, sigmoid, tanh, softmax, swish. Adam: keras.optimizers.Adam(learning_rate=1e-3). Loss: sparse_categorical_crossentropy, binary_crossentropy, mse, mae. Dataset: ds = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE). EarlyStopping: keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True). ModelCheckpoint: keras.callbacks.ModelCheckpoint("best.keras", save_best_only=True). TensorBoard: keras.callbacks.TensorBoard(log_dir="logs/"). save: model.save("model.keras"). load: model = keras.models.load_model("model.keras"). TFLite: converter = tf.lite.TFLiteConverter.from_keras_model(model); tflite_model = converter.convert(). Claude Code generates TensorFlow training pipelines, custom layers, data pipelines, and evaluation scripts.
CLAUDE.md for TensorFlow
## TensorFlow Stack
- Version: tensorflow >= 2.15 | pip install tensorflow
- Model: keras.Sequential([layers...]) | Functional: keras.Model(inputs, outputs)
- Compile: model.compile(optimizer=Adam(lr), loss="...", metrics=["accuracy"])
- Train: model.fit(ds_train, epochs=N, validation_data=ds_val, callbacks=[...])
- Data: tf.data.Dataset.from_tensor_slices(...).shuffle(N).batch(B).prefetch(AUTO)
- Save: model.save("model.keras") | keras.models.load_model("model.keras")
TensorFlow Training Pipeline
# app/tf_pipeline.py — TF/Keras build, compile, fit, evaluate, save, dataset, callbacks
from __future__ import annotations
import logging
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable
import numpy as np
import tensorflow as tf
from tensorflow import keras
log = logging.getLogger(__name__)
# Reduce TensorFlow log noise in demos
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")
# ─────────────────────────────────────────────────────────────────────────────
# 1. Dataset helpers
# ─────────────────────────────────────────────────────────────────────────────
def numpy_dataset(
X: np.ndarray,
y: np.ndarray | None = None,
batch_size: int = 32,
shuffle: bool = True,
shuffle_buffer: int = 1000,
prefetch: bool = True,
) -> tf.data.Dataset:
"""
Convert NumPy arrays to a tf.data.Dataset with shuffle/batch/prefetch.
Example:
ds = numpy_dataset(X_train, y_train, batch_size=64, shuffle=True)
model.fit(ds, epochs=10)
"""
if y is not None:
ds = tf.data.Dataset.from_tensor_slices((X, y))
else:
ds = tf.data.Dataset.from_tensor_slices(X)
if shuffle:
ds = ds.shuffle(buffer_size=shuffle_buffer, seed=42)
ds = ds.batch(batch_size)
if prefetch:
ds = ds.prefetch(tf.data.AUTOTUNE)
return ds
def make_augmentation_layer(
flip: bool = True,
rotation: float = 0.1,
zoom: float = 0.1,
) -> keras.Sequential:
"""
Build a Keras Sequential image augmentation layer.
Example:
aug = make_augmentation_layer(flip=True, rotation=0.15)
model = keras.Sequential([aug, backbone])
"""
layers = []
if flip:
layers.append(keras.layers.RandomFlip("horizontal"))
if rotation:
layers.append(keras.layers.RandomRotation(rotation))
if zoom:
layers.append(keras.layers.RandomZoom(zoom))
return keras.Sequential(layers, name="augmentation")
# ─────────────────────────────────────────────────────────────────────────────
# 2. Model builders
# ─────────────────────────────────────────────────────────────────────────────
def mlp(
input_dim: int,
output_dim: int,
hidden_dims: list[int] = (256, 128, 64),
activation: str = "relu",
dropout_rate: float = 0.3,
l2_reg: float = 1e-4,
output_activation: str | None = None,
) -> keras.Model:
"""
Multi-layer perceptron with dropout and L2 regularization.
Example:
model = mlp(input_dim=784, output_dim=10, hidden_dims=[256, 128])
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
"""
reg = keras.regularizers.l2(l2_reg) if l2_reg else None
inputs = keras.Input(shape=(input_dim,), name="inputs")
x = inputs
for i, units in enumerate(hidden_dims):
x = keras.layers.Dense(units, activation=activation,
kernel_regularizer=reg, name=f"dense_{i}")(x)
x = keras.layers.BatchNormalization(name=f"bn_{i}")(x)
if dropout_rate > 0:
x = keras.layers.Dropout(dropout_rate, name=f"dropout_{i}")(x)
outputs = keras.layers.Dense(output_dim, activation=output_activation, name="output")(x)
return keras.Model(inputs, outputs, name="mlp")
def cnn(
input_shape: tuple[int, int, int],
num_classes: int,
filters: list[int] = (32, 64, 128),
dense_units: int = 256,
dropout_rate: float = 0.5,
) -> keras.Model:
"""
Simple CNN for image classification.
Example:
model = cnn(input_shape=(32, 32, 3), num_classes=10)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
"""
inputs = keras.Input(shape=input_shape, name="images")
x = inputs
for i, f in enumerate(filters):
x = keras.layers.Conv2D(f, 3, activation="relu", padding="same", name=f"conv_{i}")(x)
x = keras.layers.BatchNormalization(name=f"bn_{i}")(x)
x = keras.layers.MaxPooling2D(2, name=f"pool_{i}")(x)
x = keras.layers.GlobalAveragePooling2D(name="gap")(x)
x = keras.layers.Dense(dense_units, activation="relu", name="dense")(x)
x = keras.layers.Dropout(dropout_rate, name="dropout")(x)
outputs = keras.layers.Dense(num_classes, activation="softmax", name="output")(x)
return keras.Model(inputs, outputs, name="cnn")
def sequence_model(
vocab_size: int,
embed_dim: int = 128,
rnn_units: int = 64,
num_classes: int = 2,
dropout_rate: float = 0.3,
) -> keras.Model:
"""
Embedding + LSTM for text/sequence classification.
Example:
model = sequence_model(vocab_size=10000, embed_dim=128, rnn_units=64, num_classes=5)
"""
inputs = keras.Input(shape=(None,), dtype="int32", name="token_ids")
x = keras.layers.Embedding(vocab_size, embed_dim, mask_zero=True)(inputs)
x = keras.layers.Bidirectional(
keras.layers.LSTM(rnn_units, dropout=dropout_rate)
)(x)
x = keras.layers.Dropout(dropout_rate)(x)
outputs = keras.layers.Dense(num_classes, activation="softmax")(x)
return keras.Model(inputs, outputs, name="sequence_classifier")
# ─────────────────────────────────────────────────────────────────────────────
# 3. Training
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class TrainConfig:
epochs: int = 50
batch_size: int = 32
learning_rate: float = 1e-3
patience: int = 5
checkpoint_dir: str = "checkpoints"
tensorboard_dir: str = "logs"
verbose: int = 1
class_weights: dict | None = None
def make_callbacks(cfg: TrainConfig, model_name: str = "model") -> list[keras.callbacks.Callback]:
"""
Build standard training callbacks: EarlyStopping, ModelCheckpoint, TensorBoard.
Example:
callbacks = make_callbacks(TrainConfig(patience=10))
model.fit(ds_train, callbacks=callbacks)
"""
Path(cfg.checkpoint_dir).mkdir(parents=True, exist_ok=True)
return [
keras.callbacks.EarlyStopping(
monitor="val_loss",
patience=cfg.patience,
restore_best_weights=True,
verbose=1,
),
keras.callbacks.ModelCheckpoint(
filepath=f"{cfg.checkpoint_dir}/{model_name}_best.keras",
monitor="val_loss",
save_best_only=True,
verbose=0,
),
keras.callbacks.ReduceLROnPlateau(
monitor="val_loss",
factor=0.5,
patience=max(2, cfg.patience // 2),
min_lr=1e-6,
verbose=1,
),
keras.callbacks.TensorBoard(
log_dir=f"{cfg.tensorboard_dir}/{model_name}",
histogram_freq=0,
),
]
def train(
model: keras.Model,
ds_train: tf.data.Dataset,
ds_val: tf.data.Dataset | tuple[np.ndarray, np.ndarray],
cfg: TrainConfig | None = None,
loss: str = "sparse_categorical_crossentropy",
metrics: list[str] | None = None,
optimizer: str | keras.optimizers.Optimizer | None = None,
) -> keras.callbacks.History:
"""
Compile and train a model with standard callbacks.
Example:
history = train(model, ds_train, ds_val, cfg=TrainConfig(epochs=50))
plot_history(history)
"""
cfg = cfg or TrainConfig()
opt = optimizer or keras.optimizers.Adam(learning_rate=cfg.learning_rate)
model.compile(optimizer=opt, loss=loss, metrics=metrics or ["accuracy"])
callbacks = make_callbacks(cfg, model_name=model.name)
history = model.fit(
ds_train,
epochs=cfg.epochs,
validation_data=ds_val,
callbacks=callbacks,
class_weight=cfg.class_weights,
verbose=cfg.verbose,
)
return history
# ─────────────────────────────────────────────────────────────────────────────
# 4. Evaluation helpers
# ─────────────────────────────────────────────────────────────────────────────
def evaluate_classifier(
model: keras.Model,
ds: tf.data.Dataset | tuple[np.ndarray, np.ndarray],
class_names: list[str] | None = None,
) -> dict:
"""
Evaluate a classification model and return metrics + confusion matrix.
Example:
results = evaluate_classifier(model, ds_test, class_names=["cat","dog"])
print(f"Accuracy: {results['accuracy']:.3f}")
"""
if isinstance(ds, tuple):
X, y_true = ds
y_prob = model.predict(X, verbose=0)
else:
y_true_list, y_prob_list = [], []
for X_batch, y_batch in ds:
y_true_list.append(y_batch.numpy())
y_prob_list.append(model.predict(X_batch, verbose=0))
y_true = np.concatenate(y_true_list)
y_prob = np.concatenate(y_prob_list)
y_pred = y_prob.argmax(axis=1)
accuracy = float((y_pred == y_true.ravel()).mean())
return {
"accuracy": accuracy,
"n_samples": len(y_true),
"predictions": y_pred,
"probabilities": y_prob,
}
def history_summary(history: keras.callbacks.History) -> dict:
"""
Extract best metrics from training history.
Example:
summary = history_summary(history)
print(f"Best val_acc: {summary['best_val_accuracy']:.3f} at epoch {summary['best_epoch']}")
"""
h = history.history
val_loss = h.get("val_loss", [])
if not val_loss:
return {}
best_epoch = int(np.argmin(val_loss))
return {
"best_epoch": best_epoch + 1,
"best_val_loss": round(val_loss[best_epoch], 4),
"best_val_accuracy": round(h.get("val_accuracy", [0])[best_epoch], 4),
"final_epoch": len(val_loss),
"early_stopped": len(val_loss) < len(h.get("loss", val_loss)),
}
# ─────────────────────────────────────────────────────────────────────────────
# 5. Model persistence
# ─────────────────────────────────────────────────────────────────────────────
def save_model(model: keras.Model, path: str | Path, fmt: str = "keras") -> Path:
"""
Save a Keras model.
Example:
save_model(model, "artifacts/classifier.keras")
save_model(model, "artifacts/serving", fmt="savedmodel")
"""
p = Path(path)
p.parent.mkdir(parents=True, exist_ok=True)
if fmt == "savedmodel":
model.export(str(p))
else:
model.save(str(p if p.suffix else p.with_suffix(".keras")))
return p
def load_model(path: str | Path, **kwargs) -> keras.Model:
"""
Load a saved Keras model.
Example:
model = load_model("artifacts/classifier.keras")
"""
return keras.models.load_model(str(path), **kwargs)
def export_tflite(
model: keras.Model,
path: str | Path,
quantize: bool = False,
) -> Path:
"""
Convert and export to TensorFlow Lite.
Example:
tflite_path = export_tflite(model, "mobile/model.tflite")
tflite_path = export_tflite(model, "mobile/model_int8.tflite", quantize=True)
"""
converter = tf.lite.TFLiteConverter.from_keras_model(model)
if quantize:
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite = converter.convert()
p = Path(path)
p.parent.mkdir(parents=True, exist_ok=True)
p.write_bytes(tflite)
log.info("TFLite model: %s (%.1f KB)", p, len(tflite) / 1024)
return p
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
rng = np.random.default_rng(42)
print("=== TensorFlow / Keras demo ===")
print(f" TF version: {tf.__version__}")
# Synthetic binary classification data
X = rng.normal(0, 1, (1000, 20)).astype(np.float32)
y = (X[:, :5].sum(axis=1) > 0).astype(np.int32)
ds_train = numpy_dataset(X[:800], y[:800], batch_size=32)
ds_val = numpy_dataset(X[800:], y[800:], batch_size=32, shuffle=False)
print("\n--- MLP build ---")
model = mlp(input_dim=20, output_dim=2, hidden_dims=[64, 32], dropout_rate=0.2)
model.summary(print_fn=lambda s: print(f" {s}") if "Total" in s or "Layer" in s else None)
print("\n--- compile + fit (3 epochs for demo) ---")
cfg = TrainConfig(epochs=3, batch_size=32, patience=10, verbose=0)
history = train(model, ds_train, ds_val, cfg=cfg)
summary = history_summary(history)
print(f" Best val_loss: {summary.get('best_val_loss','?')} "
f"val_accuracy: {summary.get('best_val_accuracy','?')}")
print("\n--- evaluate ---")
results = evaluate_classifier(model, (X[800:], y[800:]))
print(f" Accuracy: {results['accuracy']:.3f} n={results['n_samples']}")
print("\n=== done ===")
For the PyTorch alternative — PyTorch uses dynamic computation graphs enabling more Pythonic debugging (inspect tensor values mid-forward pass), is preferred in research and academia, and has a larger ecosystem of cutting-edge model implementations (Hugging Face Transformers defaults to PyTorch); TensorFlow/Keras provides end-to-end deployment tooling (TFLite, TF Serving, TF.js), is tightly integrated with Google Cloud Vertex AI, and has a simpler high-level Keras API for production training pipelines — use PyTorch for research, fine-tuning HuggingFace models, and when you want maximum Python debugging flexibility, TensorFlow when you need TFLite for mobile, TF Serving for production inference, or Vertex AI integration. For the JAX alternative — JAX provides NumPy-compatible array operations with automatic differentiation, JIT compilation via XLA, and both SPMD multi-device and TPU support; TensorFlow Keras provides a higher-level training API with callbacks, fit(), and built-in serialization — use JAX/Flax for research requiring custom training loops, TPU scaling, and functional programming style, TensorFlow for teams that want a batteries-included training API and Google’s production deployment stack. The Claude Skills 360 bundle includes TensorFlow skill sets covering numpy_dataset()/make_augmentation_layer() data pipelines, mlp()/cnn()/sequence_model() model builders, TrainConfig/make_callbacks()/train() training loop, evaluate_classifier()/history_summary() evaluation, and save_model()/load_model()/export_tflite() persistence. Start with the free tier to try deep learning model training and TensorFlow pipeline code generation.