Production deployment pipelines need more than running tests and pushing containers. Safe deployments require staged rollouts, automated health checks, and rollback triggers that fire before users notice an outage. Claude Code generates deployment pipelines with production safety in mind: canary analysis, traffic splitting, automated rollback conditions, and the gate logic between environments.
This guide covers advanced CI/CD with Claude Code: multi-stage Docker builds, deployment strategies, canary releases, and automated rollbacks.
Multi-Stage Docker Builds
Optimize the Docker build for a Node.js API.
Current image is 1.2GB. Target: < 200MB, fast cache hits.
# Dockerfile
# Stage 1: Install dependencies (cached unless package.json changes)
FROM node:20-alpine AS deps
WORKDIR /app
# Copy only lock files first — cache this layer if they don't change
COPY package.json package-lock.json ./
RUN npm ci --only=production --ignore-scripts
# Stage 2: Build (cached unless source changes)
FROM node:20-alpine AS builder
WORKDIR /app
COPY package.json package-lock.json ./
RUN npm ci --ignore-scripts # Install devDeps too for build
COPY . .
RUN npm run build && npm prune --production
# Stage 3: Runtime image (minimal)
FROM node:20-alpine AS runner
WORKDIR /app
# Security: don't run as root
RUN addgroup --system --gid 1001 appgroup && \
adduser --system --uid 1001 --ingroup appgroup appuser
# Copy only what's needed to run
COPY --from=builder --chown=appuser:appgroup /app/dist ./dist
COPY --from=deps --chown=appuser:appgroup /app/node_modules ./node_modules
COPY --chown=appuser:appgroup package.json ./
USER appuser
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD node -e "require('http').get('http://localhost:8080/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1))"
EXPOSE 8080
CMD ["node", "dist/main.js"]
Result: ~120MB runtime image (down from 1.2GB), predictable layer caching.
Full Deployment Pipeline
Build a GitHub Actions workflow that:
- Tests on every PR
- Deploys to staging automatically on merge to main
- Requires manual approval before production
- Runs smoke tests after each deployment
# .github/workflows/deploy.yml
name: Deploy
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
IMAGE: ghcr.io/${{ github.repository }}
IMAGE_TAG: ${{ github.sha }}
jobs:
test:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:16-alpine
env:
POSTGRES_PASSWORD: test
POSTGRES_DB: testdb
options: >-
--health-cmd pg_isready
--health-interval 5s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'npm'
- run: npm ci
- run: npm run typecheck
- run: npm run lint
- run: npm test -- --coverage
env:
DATABASE_URL: postgresql://postgres:test@localhost:5432/testdb
build:
needs: test
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
outputs:
image: ${{ env.IMAGE }}:${{ env.IMAGE_TAG }}
steps:
- uses: actions/checkout@v4
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: |
${{ env.IMAGE }}:${{ env.IMAGE_TAG }}
${{ env.IMAGE }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max
deploy-staging:
needs: build
runs-on: ubuntu-latest
environment: staging
steps:
- name: Deploy to staging
run: |
kubectl set image deployment/api api=${{ needs.build.outputs.image }} \
--namespace=staging \
--record
kubectl rollout status deployment/api --namespace=staging --timeout=5m
- name: Run smoke tests
run: |
npm run test:smoke -- --env=staging
env:
SMOKE_TEST_URL: https://staging.api.example.com
deploy-production:
needs: deploy-staging
runs-on: ubuntu-latest
environment:
name: production
url: https://api.example.com # GitHub requires manual approval for this env
steps:
- name: Deploy canary (10% traffic)
run: |
kubectl apply -f k8s/canary-deployment.yaml \
--set image=${{ needs.build.outputs.image }}
- name: Monitor canary (5 minutes)
run: |
sleep 300 # Wait 5 minutes
# Check error rate on canary pods
ERROR_RATE=$(kubectl exec deploy/api-canary -- \
curl -s http://localhost:9090/metrics | \
grep 'http_errors_total' | awk '{print $2}')
if (( $(echo "$ERROR_RATE > 0.01" | bc) )); then
echo "Canary error rate too high: $ERROR_RATE"
kubectl delete -f k8s/canary-deployment.yaml
exit 1
fi
- name: Full production rollout
run: |
kubectl set image deployment/api api=${{ needs.build.outputs.image }} \
--namespace=production
kubectl rollout status deployment/api --namespace=production --timeout=10m
kubectl delete -f k8s/canary-deployment.yaml --ignore-not-found
Automated Rollback
When production latency p99 exceeds 2 seconds, automatically
roll back the deployment. Set up the monitoring and rollback trigger.
# .github/workflows/production-monitor.yml
name: Production Health Monitor
on:
workflow_run:
workflows: [Deploy]
types: [completed]
jobs:
monitor-deployment:
if: ${{ github.event.workflow_run.conclusion == 'success' }}
runs-on: ubuntu-latest
steps:
- name: Monitor for 15 minutes post-deploy
run: |
DEPLOYMENT_SHA="${{ github.event.workflow_run.head_sha }}"
MAX_LATENCY_MS=2000
CHECK_INTERVAL=60
DURATION=900 # 15 minutes
for i in $(seq 1 $((DURATION / CHECK_INTERVAL))); do
sleep $CHECK_INTERVAL
# Query Prometheus for p99 latency
P99=$(curl -s "https://prometheus.internal/api/v1/query" \
--data-urlencode 'query=histogram_quantile(0.99, rate(http_request_duration_ms_bucket[2m]))' \
| jq -r '.data.result[0].value[1]')
echo "Check $i: p99=${P99}ms"
if (( $(echo "$P99 > $MAX_LATENCY_MS" | bc) )); then
echo "❌ p99 latency ${P99}ms exceeds ${MAX_LATENCY_MS}ms — rolling back"
# Roll back Kubernetes deployment
kubectl rollout undo deployment/api --namespace=production
# Alert Slack
curl -X POST ${{ secrets.SLACK_WEBHOOK }} \
-d '{"text":"🚨 Auto-rollback triggered: p99='$P99'ms exceeded '$MAX_LATENCY_MS'ms on '"$DEPLOYMENT_SHA"'"}'
exit 1
fi
done
echo "✅ Deployment healthy for 15 minutes"
Blue-Green Deployments
Implement blue-green deployment with instant cutover and
no traffic loss during switch.
// scripts/blue-green-deploy.ts
import { execSync } from 'child_process';
async function blueGreenDeploy(newImage: string) {
// Determine current active color
const activeColor = execSync(
`kubectl get service api -n production -o jsonpath='{.spec.selector.color}'`
).toString().trim() || 'blue';
const inactiveColor = activeColor === 'blue' ? 'green' : 'blue';
console.log(`Active: ${activeColor}, deploying to: ${inactiveColor}`);
// 1. Update inactive deployment (no traffic yet)
execSync(`kubectl set image deployment/api-${inactiveColor} api=${newImage} -n production`);
execSync(`kubectl rollout status deployment/api-${inactiveColor} -n production --timeout=5m`);
// 2. Run smoke tests against inactive deployment (directly, bypassing load balancer)
const inactiveUrl = execSync(
`kubectl get service api-${inactiveColor}-internal -n production -o jsonpath='{.status.loadBalancer.ingress[0].ip}'`
).toString().trim();
execSync(`npm run test:smoke -- --url=http://${inactiveUrl}:8080`);
console.log('Smoke tests passed');
// 3. Switch traffic (instant cutover — no downtime)
execSync(`kubectl patch service api -n production -p '{"spec":{"selector":{"color":"${inactiveColor}"}}}'`);
console.log(`✅ Traffic switched to ${inactiveColor}`);
// 4. Scale down old deployment (keep for quick rollback for 1h)
setTimeout(() => {
execSync(`kubectl scale deployment/api-${activeColor} --replicas=0 -n production`);
console.log(`Scaled down ${activeColor} deployment`);
}, 3600 * 1000);
}
For GitOps management of these pipelines via ArgoCD, see the GitOps guide. For load testing the deployments before they go live, see the load testing guide. The Claude Skills 360 bundle includes CI/CD skill sets covering deployment strategies, pipeline optimization, and zero-downtime deployments. Start with the free tier to try deployment pipeline code generation.