Rate limiting protects your service and enables fair resource allocation. Without it, a single misbehaving client can degrade service for everyone. The algorithm choice matters: token bucket for bursty traffic, sliding window for smooth limiting, fixed window for simple per-minute quotas. Claude Code implements these correctly — handling the distributed case, edge conditions at window boundaries, and the headers that tell clients what to do.

Algorithm Comparison

Algorithm	Burst Allowed?	Memory	Accuracy	Best For
Fixed Window	Yes (at boundary)	Low	Low	Simple per-minute quotas
Sliding Window Log	No	High	Exact	Precise limiting, low traffic
Sliding Window Counter	Small	Low	~5% error	High traffic, some burst OK
Token Bucket	Yes (configurable)	Low	Exact	APIs with bursty patterns
Leaky Bucket	No	Low	Exact	Smooth output rate

Most API use cases: Token bucket (allows bursts up to the bucket size) or Sliding Window Counter (Redis-native with INCR + EXPIRE).

Redis Sliding Window Counter

Implement rate limiting for our API: 100 requests per minute per user.
Multiple servers — must work distributed.

// src/lib/rateLimiter.ts
import { Redis } from 'ioredis';

interface RateLimitResult {
  allowed: boolean;
  limit: number;
  remaining: number;
  resetAt: Date;
  retryAfter?: number; // seconds until next request allowed, if denied
}

// Atomic Lua script — runs on Redis server, no race conditions
const SLIDING_WINDOW_SCRIPT = `
local key = KEYS[1]
local now = tonumber(ARGV[1])
local window = tonumber(ARGV[2])
local limit = tonumber(ARGV[3])
local request_id = ARGV[4]

-- Remove requests outside the window
redis.call('ZREMRANGEBYSCORE', key, 0, now - window)

-- Count current requests in window
local count = redis.call('ZCARD', key)

if count >= limit then
  -- Denied — return count and oldest request time
  local oldest = redis.call('ZRANGE', key, 0, 0, 'WITHSCORES')
  return {0, count, oldest[2] or '0'}
end

-- Add this request
redis.call('ZADD', key, now, request_id)
redis.call('EXPIRE', key, math.ceil(window / 1000))

return {1, count + 1, '0'}
`;

export class SlidingWindowRateLimiter {
  private script: string;
  
  constructor(
    private redis: Redis,
    private limit: number,
    private windowMs: number,
  ) {
    this.script = SLIDING_WINDOW_SCRIPT;
  }
  
  async check(identifier: string): Promise<RateLimitResult> {
    const key = `ratelimit:${identifier}`;
    const now = Date.now();
    const requestId = `${now}-${Math.random().toString(36).slice(2)}`;
    
    const [allowed, count, oldestTimestamp] = await this.redis.eval(
      this.script,
      1,
      key,
      now.toString(),
      this.windowMs.toString(),
      this.limit.toString(),
      requestId,
    ) as [number, number, string];
    
    const resetAt = new Date(now + this.windowMs);
    
    if (!allowed) {
      const oldestMs = parseFloat(oldestTimestamp);
      const retryAfterMs = oldestMs + this.windowMs - now;
      return {
        allowed: false,
        limit: this.limit,
        remaining: 0,
        resetAt,
        retryAfter: Math.ceil(retryAfterMs / 1000),
      };
    }
    
    return {
      allowed: true,
      limit: this.limit,
      remaining: this.limit - count,
      resetAt,
    };
  }
}

Express Middleware

Add rate limiting middleware to the Express API.
Different limits for different endpoints — auth endpoints stricter.

// src/middleware/rateLimiting.ts
import { Request, Response, NextFunction } from 'express';
import { SlidingWindowRateLimiter } from '../lib/rateLimiter';
import { redis } from '../lib/redis';

// Different limiters for different use cases
const limiters = {
  global: new SlidingWindowRateLimiter(redis, 100, 60_000),      // 100/min per IP
  auth: new SlidingWindowRateLimiter(redis, 10, 60_000),         // 10/min for auth
  expensive: new SlidingWindowRateLimiter(redis, 10, 60_000),    // 10/min for slow endpoints
};

function getRateLimitIdentifier(req: Request, scope: string): string {
  // Authenticated: limit per user; anonymous: limit per IP
  const userId = req.user?.id;
  const ip = req.ip ?? req.socket.remoteAddress ?? 'unknown';
  return `${scope}:${userId ?? ip}`;
}

function applyRateLimitHeaders(res: Response, result: RateLimitResult): void {
  // RFC 6585 / IETF draft rate limit headers
  res.set({
    'X-RateLimit-Limit': result.limit.toString(),
    'X-RateLimit-Remaining': result.remaining.toString(),
    'X-RateLimit-Reset': Math.floor(result.resetAt.getTime() / 1000).toString(),
  });
  
  if (result.retryAfter !== undefined) {
    res.set('Retry-After', result.retryAfter.toString());
  }
}

export function rateLimit(limiterKey: keyof typeof limiters = 'global') {
  return async (req: Request, res: Response, next: NextFunction) => {
    const identifier = getRateLimitIdentifier(req, limiterKey);
    const result = await limiters[limiterKey].check(identifier);
    
    applyRateLimitHeaders(res, result);
    
    if (!result.allowed) {
      return res.status(429).json({
        error: 'Too Many Requests',
        retryAfter: result.retryAfter,
        message: `Rate limit exceeded. Try again in ${result.retryAfter} seconds.`,
      });
    }
    
    next();
  };
}

// Usage in routes
// router.post('/auth/login', rateLimit('auth'), loginHandler);
// router.get('/reports/generate', rateLimit('expensive'), reportHandler);
// app.use(rateLimit('global')); // Applied to all routes

Token Bucket for Burst Handling

Our customers need to burst. Load 1000 products at signup,
then low sustained traffic. Token bucket fits better.

// Token bucket: refills at constant rate, allows bursts up to bucket capacity
const TOKEN_BUCKET_SCRIPT = `
local key = KEYS[1]
local capacity = tonumber(ARGV[1])
local refill_rate = tonumber(ARGV[2])  -- tokens per second
local requested = tonumber(ARGV[3])
local now = tonumber(ARGV[4])

-- Get current bucket state
local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
local tokens = tonumber(bucket[1]) or capacity
local last_refill = tonumber(bucket[2]) or now

-- Calculate token refill since last check
local elapsed = (now - last_refill) / 1000  -- convert ms to seconds
local new_tokens = math.min(capacity, tokens + elapsed * refill_rate)

if new_tokens < requested then
  -- Not enough tokens
  local wait_time = (requested - new_tokens) / refill_rate
  redis.call('HMSET', key, 'tokens', new_tokens, 'last_refill', now)
  redis.call('EXPIRE', key, math.ceil(capacity / refill_rate) + 60)
  return {0, math.floor(new_tokens), math.ceil(wait_time)}
end

-- Consume tokens
local remaining = new_tokens - requested
redis.call('HMSET', key, 'tokens', remaining, 'last_refill', now)
redis.call('EXPIRE', key, math.ceil(capacity / refill_rate) + 60)
return {1, math.floor(remaining), 0}
`;

export class TokenBucketRateLimiter {
  constructor(
    private redis: Redis,
    private capacity: number,       // max burst size
    private refillRate: number,     // tokens per second
  ) {}
  
  async consume(identifier: string, tokens = 1): Promise<RateLimitResult> {
    const key = `tokenbucket:${identifier}`;
    const now = Date.now();
    
    const [allowed, remaining, waitSeconds] = await this.redis.eval(
      TOKEN_BUCKET_SCRIPT,
      1,
      key,
      this.capacity.toString(),
      this.refillRate.toString(),
      tokens.toString(),
      now.toString(),
    ) as [number, number, number];
    
    return {
      allowed: Boolean(allowed),
      limit: this.capacity,
      remaining,
      resetAt: new Date(now + (waitSeconds * 1000)),
      retryAfter: allowed ? undefined : waitSeconds,
    };
  }
}

Tiered Rate Limits by Plan

Free users: 100 requests/min
Pro users: 1000 requests/min
Enterprise: 10000 requests/min, plus per-endpoint overrides

const PLAN_LIMITS: Record<string, { requestsPerMinute: number }> = {
  free: { requestsPerMinute: 100 },
  pro: { requestsPerMinute: 1000 },
  enterprise: { requestsPerMinute: 10000 },
};

export function tieredRateLimit() {
  const limiters = new Map<number, SlidingWindowRateLimiter>();
  
  return async (req: Request, res: Response, next: NextFunction) => {
    const plan = req.user?.plan ?? 'free';
    const limit = PLAN_LIMITS[plan]?.requestsPerMinute ?? 100;
    
    // Create limiter for this limit value if not exists
    if (!limiters.has(limit)) {
      limiters.set(limit, new SlidingWindowRateLimiter(redis, limit, 60_000));
    }
    
    const identifier = `plan:${req.user?.id ?? req.ip}`;
    const result = await limiters.get(limit)!.check(identifier);
    
    applyRateLimitHeaders(res, result);
    
    if (!result.allowed) {
      return res.status(429).json({
        error: 'Rate limit exceeded',
        plan,
        limit,
        upgrade_url: plan !== 'enterprise' ? '/pricing' : undefined,
      });
    }
    
    next();
  };
}

For the Redis infrastructure these patterns rely on, including Redis clusters and high availability, see the Redis guide. For applying rate limiting at the API gateway level before requests reach your services, the API gateway guide covers gateway-level rate limiting. The Claude Skills 360 bundle includes backend infrastructure skill sets covering rate limiting, circuit breaking, and resilience patterns. Start with the free tier to try rate limiting implementation.

Claude Code for Rate Limiting: Algorithms, Distributed Patterns, and Implementation

Algorithm Comparison

Redis Sliding Window Counter

Express Middleware

Token Bucket for Burst Handling

Tiered Rate Limits by Plan

Keep Reading

Claude Code for Bun: Fast JavaScript Runtime and Toolkit

Claude Code for Express.js Advanced: Patterns for Production APIs

Claude Code for KeystoneJS: Node.js CMS and App Framework

Put these ideas into practice