Rate limiting protects your service and enables fair resource allocation. Without it, a single misbehaving client can degrade service for everyone. The algorithm choice matters: token bucket for bursty traffic, sliding window for smooth limiting, fixed window for simple per-minute quotas. Claude Code implements these correctly — handling the distributed case, edge conditions at window boundaries, and the headers that tell clients what to do.
Algorithm Comparison
| Algorithm | Burst Allowed? | Memory | Accuracy | Best For |
|---|---|---|---|---|
| Fixed Window | Yes (at boundary) | Low | Low | Simple per-minute quotas |
| Sliding Window Log | No | High | Exact | Precise limiting, low traffic |
| Sliding Window Counter | Small | Low | ~5% error | High traffic, some burst OK |
| Token Bucket | Yes (configurable) | Low | Exact | APIs with bursty patterns |
| Leaky Bucket | No | Low | Exact | Smooth output rate |
Most API use cases: Token bucket (allows bursts up to the bucket size) or Sliding Window Counter (Redis-native with INCR + EXPIRE).
Redis Sliding Window Counter
Implement rate limiting for our API: 100 requests per minute per user.
Multiple servers — must work distributed.
// src/lib/rateLimiter.ts
import { Redis } from 'ioredis';
interface RateLimitResult {
allowed: boolean;
limit: number;
remaining: number;
resetAt: Date;
retryAfter?: number; // seconds until next request allowed, if denied
}
// Atomic Lua script — runs on Redis server, no race conditions
const SLIDING_WINDOW_SCRIPT = `
local key = KEYS[1]
local now = tonumber(ARGV[1])
local window = tonumber(ARGV[2])
local limit = tonumber(ARGV[3])
local request_id = ARGV[4]
-- Remove requests outside the window
redis.call('ZREMRANGEBYSCORE', key, 0, now - window)
-- Count current requests in window
local count = redis.call('ZCARD', key)
if count >= limit then
-- Denied — return count and oldest request time
local oldest = redis.call('ZRANGE', key, 0, 0, 'WITHSCORES')
return {0, count, oldest[2] or '0'}
end
-- Add this request
redis.call('ZADD', key, now, request_id)
redis.call('EXPIRE', key, math.ceil(window / 1000))
return {1, count + 1, '0'}
`;
export class SlidingWindowRateLimiter {
private script: string;
constructor(
private redis: Redis,
private limit: number,
private windowMs: number,
) {
this.script = SLIDING_WINDOW_SCRIPT;
}
async check(identifier: string): Promise<RateLimitResult> {
const key = `ratelimit:${identifier}`;
const now = Date.now();
const requestId = `${now}-${Math.random().toString(36).slice(2)}`;
const [allowed, count, oldestTimestamp] = await this.redis.eval(
this.script,
1,
key,
now.toString(),
this.windowMs.toString(),
this.limit.toString(),
requestId,
) as [number, number, string];
const resetAt = new Date(now + this.windowMs);
if (!allowed) {
const oldestMs = parseFloat(oldestTimestamp);
const retryAfterMs = oldestMs + this.windowMs - now;
return {
allowed: false,
limit: this.limit,
remaining: 0,
resetAt,
retryAfter: Math.ceil(retryAfterMs / 1000),
};
}
return {
allowed: true,
limit: this.limit,
remaining: this.limit - count,
resetAt,
};
}
}
Express Middleware
Add rate limiting middleware to the Express API.
Different limits for different endpoints — auth endpoints stricter.
// src/middleware/rateLimiting.ts
import { Request, Response, NextFunction } from 'express';
import { SlidingWindowRateLimiter } from '../lib/rateLimiter';
import { redis } from '../lib/redis';
// Different limiters for different use cases
const limiters = {
global: new SlidingWindowRateLimiter(redis, 100, 60_000), // 100/min per IP
auth: new SlidingWindowRateLimiter(redis, 10, 60_000), // 10/min for auth
expensive: new SlidingWindowRateLimiter(redis, 10, 60_000), // 10/min for slow endpoints
};
function getRateLimitIdentifier(req: Request, scope: string): string {
// Authenticated: limit per user; anonymous: limit per IP
const userId = req.user?.id;
const ip = req.ip ?? req.socket.remoteAddress ?? 'unknown';
return `${scope}:${userId ?? ip}`;
}
function applyRateLimitHeaders(res: Response, result: RateLimitResult): void {
// RFC 6585 / IETF draft rate limit headers
res.set({
'X-RateLimit-Limit': result.limit.toString(),
'X-RateLimit-Remaining': result.remaining.toString(),
'X-RateLimit-Reset': Math.floor(result.resetAt.getTime() / 1000).toString(),
});
if (result.retryAfter !== undefined) {
res.set('Retry-After', result.retryAfter.toString());
}
}
export function rateLimit(limiterKey: keyof typeof limiters = 'global') {
return async (req: Request, res: Response, next: NextFunction) => {
const identifier = getRateLimitIdentifier(req, limiterKey);
const result = await limiters[limiterKey].check(identifier);
applyRateLimitHeaders(res, result);
if (!result.allowed) {
return res.status(429).json({
error: 'Too Many Requests',
retryAfter: result.retryAfter,
message: `Rate limit exceeded. Try again in ${result.retryAfter} seconds.`,
});
}
next();
};
}
// Usage in routes
// router.post('/auth/login', rateLimit('auth'), loginHandler);
// router.get('/reports/generate', rateLimit('expensive'), reportHandler);
// app.use(rateLimit('global')); // Applied to all routes
Token Bucket for Burst Handling
Our customers need to burst. Load 1000 products at signup,
then low sustained traffic. Token bucket fits better.
// Token bucket: refills at constant rate, allows bursts up to bucket capacity
const TOKEN_BUCKET_SCRIPT = `
local key = KEYS[1]
local capacity = tonumber(ARGV[1])
local refill_rate = tonumber(ARGV[2]) -- tokens per second
local requested = tonumber(ARGV[3])
local now = tonumber(ARGV[4])
-- Get current bucket state
local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
local tokens = tonumber(bucket[1]) or capacity
local last_refill = tonumber(bucket[2]) or now
-- Calculate token refill since last check
local elapsed = (now - last_refill) / 1000 -- convert ms to seconds
local new_tokens = math.min(capacity, tokens + elapsed * refill_rate)
if new_tokens < requested then
-- Not enough tokens
local wait_time = (requested - new_tokens) / refill_rate
redis.call('HMSET', key, 'tokens', new_tokens, 'last_refill', now)
redis.call('EXPIRE', key, math.ceil(capacity / refill_rate) + 60)
return {0, math.floor(new_tokens), math.ceil(wait_time)}
end
-- Consume tokens
local remaining = new_tokens - requested
redis.call('HMSET', key, 'tokens', remaining, 'last_refill', now)
redis.call('EXPIRE', key, math.ceil(capacity / refill_rate) + 60)
return {1, math.floor(remaining), 0}
`;
export class TokenBucketRateLimiter {
constructor(
private redis: Redis,
private capacity: number, // max burst size
private refillRate: number, // tokens per second
) {}
async consume(identifier: string, tokens = 1): Promise<RateLimitResult> {
const key = `tokenbucket:${identifier}`;
const now = Date.now();
const [allowed, remaining, waitSeconds] = await this.redis.eval(
TOKEN_BUCKET_SCRIPT,
1,
key,
this.capacity.toString(),
this.refillRate.toString(),
tokens.toString(),
now.toString(),
) as [number, number, number];
return {
allowed: Boolean(allowed),
limit: this.capacity,
remaining,
resetAt: new Date(now + (waitSeconds * 1000)),
retryAfter: allowed ? undefined : waitSeconds,
};
}
}
Tiered Rate Limits by Plan
Free users: 100 requests/min
Pro users: 1000 requests/min
Enterprise: 10000 requests/min, plus per-endpoint overrides
const PLAN_LIMITS: Record<string, { requestsPerMinute: number }> = {
free: { requestsPerMinute: 100 },
pro: { requestsPerMinute: 1000 },
enterprise: { requestsPerMinute: 10000 },
};
export function tieredRateLimit() {
const limiters = new Map<number, SlidingWindowRateLimiter>();
return async (req: Request, res: Response, next: NextFunction) => {
const plan = req.user?.plan ?? 'free';
const limit = PLAN_LIMITS[plan]?.requestsPerMinute ?? 100;
// Create limiter for this limit value if not exists
if (!limiters.has(limit)) {
limiters.set(limit, new SlidingWindowRateLimiter(redis, limit, 60_000));
}
const identifier = `plan:${req.user?.id ?? req.ip}`;
const result = await limiters.get(limit)!.check(identifier);
applyRateLimitHeaders(res, result);
if (!result.allowed) {
return res.status(429).json({
error: 'Rate limit exceeded',
plan,
limit,
upgrade_url: plan !== 'enterprise' ? '/pricing' : undefined,
});
}
next();
};
}
For the Redis infrastructure these patterns rely on, including Redis clusters and high availability, see the Redis guide. For applying rate limiting at the API gateway level before requests reach your services, the API gateway guide covers gateway-level rate limiting. The Claude Skills 360 bundle includes backend infrastructure skill sets covering rate limiting, circuit breaking, and resilience patterns. Start with the free tier to try rate limiting implementation.