Faker generates realistic fake data for testing and development. pip install faker. from faker import Faker. fake = Faker(). Name: fake.name(), fake.first_name(), fake.last_name(). Email: fake.email(), fake.safe_email(). Phone: fake.phone_number(). Address: fake.address(), fake.city(), fake.state(), fake.country(), fake.postcode(). Internet: fake.url(), fake.ipv4(), fake.ipv6(), fake.user_agent(), fake.domain_name(). IDs: fake.uuid4(), fake.md5(), fake.sha256(). Text: fake.text(), fake.sentence(), fake.paragraph(), fake.words(5). Dates: fake.date_of_birth(minimum_age=18, maximum_age=80), fake.date_this_decade(), fake.date_time_between("-5y", "now"). Numbers: fake.random_int(1, 1000), fake.pyfloat(left_digits=3, right_digits=2, positive=True). Boolean: fake.boolean(chance_of_getting_true=70). Credit card: fake.credit_card_number(), fake.credit_card_expiry(), fake.credit_card_provider(). Company: fake.company(), fake.job(), fake.catch_phrase(). Color: fake.color_name(), fake.hex_color(). Locale: Faker("de_DE"), Faker(["en_US","fr_FR"]) — multi-locale. Seed: Faker.seed(42) — reproducible data. Custom provider: subclass BaseProvider. Profile: fake.simple_profile(), fake.profile(fields=["name","mail","address"]). fake.unique.email() — no repeats. Claude Code generates Faker test fixtures, database seed scripts, and localized demo data.
CLAUDE.md for Faker
## Faker Stack
- Version: faker >= 25.0
- Import: from faker import Faker; fake = Faker()
- Locale: Faker("de_DE") | Faker(["en_US", "es_MX"]) for multi-locale
- Seed: Faker.seed(42) (class-level) | fake.seed_instance(42) (instance)
- Unique: fake.unique.email() — raises UniquenessException after exhaustion
- Custom: subclass BaseProvider and fake.add_provider(MyProvider)
- Profile: fake.simple_profile() for a quick person dict
Faker Test Data Pipeline
# tests/faker_pipeline.py — realistic test data generation with Faker
from __future__ import annotations
import datetime
import decimal
import random
import uuid
from typing import Any
from faker import Faker
from faker.providers import BaseProvider
# Reproducible seed for deterministic tests
Faker.seed(42)
fake = Faker(["en_US"]) # or Faker(["en_US", "de_DE", "ja_JP"])
# ── 0. Custom providers ───────────────────────────────────────────────────────
class EcommerceProvider(BaseProvider):
"""Custom Faker provider for e-commerce domain data."""
CATEGORIES = ["Electronics", "Clothing", "Home & Garden", "Sports",
"Books", "Toys", "Health", "Food & Beverages"]
STATUSES = ["pending", "processing", "shipped", "delivered", "cancelled"]
PAYMENT_METHODS = ["credit_card", "paypal", "stripe", "bank_transfer", "crypto"]
UNITS = ["piece", "kg", "liter", "pack", "box"]
def product_category(self) -> str:
return self.random_element(self.CATEGORIES)
def order_status(self) -> str:
return self.random_element(self.STATUSES)
def payment_method(self) -> str:
return self.random_element(self.PAYMENT_METHODS)
def product_sku(self) -> str:
prefix = self.random_element(["PROD", "SKU", "ITEM"])
return f"{prefix}-{self.numerify('####-####')}"
def price(self, min_price: float = 0.99, max_price: float = 9999.99) -> str:
return f"{random.uniform(min_price, max_price):.2f}"
fake.add_provider(EcommerceProvider)
# ── 1. User data factories ─────────────────────────────────────────────────────
def make_user(
locale: str = "en_US",
confirmed: bool = True,
) -> dict:
"""Generate a realistic user record."""
f = Faker(locale)
dob = f.date_of_birth(minimum_age=18, maximum_age=80)
return {
"id": str(f.uuid4()),
"username": f.user_name(),
"email": f.unique.safe_email(),
"first_name": f.first_name(),
"last_name": f.last_name(),
"phone": f.phone_number(),
"date_of_birth": dob.isoformat(),
"age": (datetime.date.today() - dob).days // 365,
"gender": random.choice(["male", "female", "non_binary", "prefer_not_to_say"]),
"locale": locale,
"is_active": True,
"is_confirmed": confirmed,
"created_at": f.date_time_between("-3y", "now").isoformat(),
}
def make_address(locale: str = "en_US") -> dict:
"""Generate a postal address for a given locale."""
f = Faker(locale)
return {
"id": str(f.uuid4()),
"street": f.street_address(),
"city": f.city(),
"state": f.state() if hasattr(f, "state") else f.province(),
"postcode": f.postcode(),
"country": f.current_country_code(),
"is_default": True,
}
def make_users_batch(
n: int = 100,
locales: list[str] = None,
seed: int = None,
) -> list[dict]:
"""
Generate a batch of user records with optional locale mixing.
seed makes output reproducible.
"""
if seed is not None:
Faker.seed(seed)
locales = locales or ["en_US"]
return [make_user(locale=random.choice(locales)) for _ in range(n)]
# ── 2. E-commerce data factories ──────────────────────────────────────────────
def make_product() -> dict:
"""Generate a product listing."""
category = fake.product_category()
return {
"id": str(fake.uuid4()),
"sku": fake.product_sku(),
"name": fake.catch_phrase().title()[:80],
"description": fake.paragraph(nb_sentences=3),
"category": category,
"price": fake.price(min_price=1.99, max_price=299.99),
"stock": fake.random_int(0, 500),
"weight_kg": round(random.uniform(0.1, 20.0), 2),
"is_active": fake.boolean(chance_of_getting_true=85),
"created_at": fake.date_time_between("-2y", "now").isoformat(),
"tags": fake.words(nb=random.randint(1, 5), unique=True),
}
def make_order(
user_id: str = None,
n_items: int = None,
) -> dict:
"""Generate a realistic order with line items."""
user_id = user_id or str(fake.uuid4())
n_items = n_items or random.randint(1, 6)
items = []
subtotal = decimal.Decimal("0.00")
for _ in range(n_items):
price = decimal.Decimal(fake.price(min_price=0.99, max_price=199.99))
qty = random.randint(1, 5)
items.append({
"product_id": str(fake.uuid4()),
"sku": fake.product_sku(),
"name": fake.catch_phrase().title()[:60],
"qty": qty,
"unit_price": str(price),
"line_total": str(price * qty),
})
subtotal += price * qty
tax = (subtotal * decimal.Decimal("0.08")).quantize(decimal.Decimal("0.01"))
shipping = decimal.Decimal("9.99") if subtotal < 50 else decimal.Decimal("0.00")
return {
"id": str(fake.uuid4()),
"order_number": f"ORD-{fake.numerify('########')}",
"user_id": user_id,
"status": fake.order_status(),
"items": items,
"subtotal": str(subtotal),
"tax": str(tax),
"shipping": str(shipping),
"total": str(subtotal + tax + shipping),
"payment_method": fake.payment_method(),
"shipping_address": make_address(),
"created_at": fake.date_time_between("-1y", "now").isoformat(),
"updated_at": fake.date_time_between("-30d", "now").isoformat(),
}
# ── 3. API / network data ─────────────────────────────────────────────────────
def make_api_event(
event_type: str = None,
) -> dict:
"""Generate a web API access log event."""
methods = ["GET", "POST", "PUT", "PATCH", "DELETE"]
paths = ["/api/users", "/api/products", "/api/orders", "/health", "/metrics"]
statuses = [200, 200, 200, 201, 204, 400, 401, 403, 404, 429, 500]
return {
"timestamp": fake.date_time_between("-7d", "now").isoformat() + "Z",
"request_id": str(fake.uuid4()),
"method": random.choice(methods),
"path": random.choice(paths),
"status_code": random.choices(statuses, weights=[10,10,10,3,2,2,2,1,3,1,1])[0],
"duration_ms": round(random.lognormvariate(4.0, 0.8), 1),
"user_agent": fake.user_agent(),
"ip_address": fake.ipv4_public(),
"user_id": str(fake.uuid4()) if fake.boolean(70) else None,
}
# ── 4. Financial test data ────────────────────────────────────────────────────
def make_transaction() -> dict:
"""Generate a financial transaction for testing payment processing."""
return {
"id": str(fake.uuid4()),
"amount": fake.price(min_price=0.01, max_price=50000.00),
"currency": fake.currency_code(),
"card_number": fake.credit_card_number(card_type="visa"),
"card_expiry": fake.credit_card_expire(),
"card_provider": fake.credit_card_provider(),
"iban": fake.iban(),
"bic": fake.swift(),
"description": fake.sentence(nb_words=6),
"reference": fake.bothify("TXN-????-####"),
"created_at": fake.date_time_this_month().isoformat(),
}
# ── 5. Localized data batches ─────────────────────────────────────────────────
def make_localized_dataset(
locales: dict[str, int], # {"en_US": 70, "de_DE": 20, "ja_JP": 10}
seed: int = 0,
) -> list[dict]:
"""
Generate a mixed-locale dataset proportional to the given distribution.
Use for testing locale-specific validation and formatting.
"""
Faker.seed(seed)
users = []
for locale, count in locales.items():
users.extend(make_user(locale=locale) for _ in range(count))
random.shuffle(users)
return users
# ── 6. Database seed helper ───────────────────────────────────────────────────
def seed_database(session, n_users: int = 50, n_products: int = 200):
"""
Seed a SQLAlchemy session with fake data.
Replace UserModel / ProductModel with your actual model classes.
"""
Faker.seed(0)
print(f"Seeding {n_users} users...")
for user_data in make_users_batch(n_users):
# Example: session.add(UserModel(**user_data))
pass
print(f"Seeding {n_products} products...")
for _ in range(n_products):
# Example: session.add(ProductModel(**make_product()))
pass
# session.commit()
print("Seed complete.")
# ── Demo ──────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("Faker Test Data Demo")
print("=" * 50)
# Single user
Faker.seed(42)
user = make_user("en_US")
print(f"\nUser: {user['first_name']} {user['last_name']}")
print(f" email: {user['email']}")
print(f" dob: {user['date_of_birth']} (age {user['age']})")
# Product
product = make_product()
print(f"\nProduct: {product['name']}")
print(f" SKU: {product['sku']}, Price: ${product['price']}, Stock: {product['stock']}")
# Order
order = make_order(user_id=user["id"])
print(f"\nOrder: {order['order_number']} ({order['status']})")
print(f" Items: {len(order['items'])}, Total: ${order['total']}")
print(f" Ship to: {order['shipping_address']['city']}, {order['shipping_address']['country']}")
# Batch
batch = make_users_batch(5, locales=["en_US", "de_DE", "ja_JP"], seed=99)
print(f"\nMixed-locale batch (5 users):")
for u in batch:
print(f" [{u['locale']}] {u['first_name']} {u['last_name']} — {u['email']}")
# API logs
events = [make_api_event() for _ in range(3)]
print(f"\nAPI events:")
for e in events:
print(f" {e['method']:6} {e['path']:<20} {e['status_code']} ({e['duration_ms']}ms)")
For the random + string.ascii_letters alternative — random.choice(string.ascii_letters) generates structurally invalid data (email “abcd”, phone “xyzq”) that passes field-presence checks but breaks real validators while Faker’s fake.email() generates RFC 5321-valid strings and fake.phone_number() follows locale-specific E.164 patterns, making test data that exercises the same validation code paths as production. For the hand-crafted fixtures alternative — manually maintained test_users.json fixtures go stale when schema changes while make_users_batch(100, seed=42) regenerates fresh conformant data on every test run, fake.unique.email() guarantees no duplicate emails in a single batch (raising UniquenessException cleanly if the pool is exhausted), and Faker("de_DE") generates Austrian postal codes and German phone formats for locale-aware validation tests without a separate DE fixture file. The Claude Skills 360 bundle includes Faker skill sets covering user/address factories, custom BaseProvider subclasses, make_order with line items and totals, make_api_event for log testing, financial transaction data, mixed-locale batch generation, Faker.seed for reproducibility, fake.unique for deduplication, and database seed helpers. Start with the free tier to try test data generation code generation.