Cerberus validates Python dicts against schema rules. pip install cerberus. Schema: schema = {"name": {"type":"string","required":True,"minlength":1,"maxlength":100}}. Validate: v = Validator(schema); v.validate({"name":"Alice"}) → True/False. Errors: v.errors → {"name": ["min length is 1"]}. Required field: {"required": True}. Nullable: {"nullable": True} — allow None. Allowed values: {"allowed": ["admin","user","mod"]}. Forbidden: {"forbidden": [0, ""]}. Type: "type": "string" or ["string","integer"] for multiple. Integer range: {"type":"integer","min":0,"max":130}. Float: {"type":"float","min":0.0}. List: {"type":"list","items":[{"type":"string"}]}. {"type":"list","schema":{"type":"integer","min":1}}. Dict nested: {"type":"dict","schema":{"street":{"type":"string"}}}. Regex: {"type":"string","regex":"^[A-Z]{2}$"}. Coerce: {"type":"integer","coerce":int} — auto-cast input. {"coerce": lambda v: v.strip().lower()}. Depends: {"dependencies":"field_b"} — require field_b when this field present. {"dependencies":{"status":["paid","shipped"]}}. Oneof: {"oneof":[{"type":"string"},{"type":"integer"}]}. Custom validator: class MyValidator(Validator): def _validate_is_email(self, constraint, field, value): if constraint and "@" not in str(value): self._error(field,"must be email"). Schema: {"type":"string","is_email":True}. allow_unknown: v = Validator(schema, allow_unknown=True) — skip unknown fields. allow_unknown={"type":"string"} — validate unknown fields as strings. Normalize: v.normalized(doc) — apply coercions without strict validation. Error handler: v.errors is dict; list(v.errors.items()). Document rules: {"keyschema":{"type":"string","regex":"^[a-z]+$"},"valueschema":{"type":"integer"}} — validates all keys and values of a dict field. Purge: v = Validator(schema, purge_unknown=True) — strip unknown fields from result. v.document — cleaned doc after validate(). Claude Code generates Cerberus schemas, coerce pipelines, and custom validators.
CLAUDE.md for Cerberus
## Cerberus Stack
- Version: cerberus >= 1.3 | pip install cerberus
- Schema: dict of field dicts: {"field": {"type":"string","required":True,"minlength":1}}
- Validate: v = Validator(schema); ok = v.validate(doc); errors = v.errors
- Coerce: {"coerce": int} | {"coerce": str.lower} — runs before type check
- Nested: {"type":"dict","schema":{...}} | {"type":"list","schema":{...}}
- Unknown: Validator(schema, allow_unknown=True) | purge_unknown=True to strip
- Custom: class V(Validator): def _validate_rule_name(self, constraint, field, value)
Cerberus Validation Pipeline
# app/validators.py — Cerberus schema definitions and validator setup
from __future__ import annotations
import re
from datetime import datetime
from typing import Any, Optional
from cerberus import Validator
# ─────────────────────────────────────────────────────────────────────────────
# Custom Validator — business rule extensions
# ─────────────────────────────────────────────────────────────────────────────
class AppValidator(Validator):
"""Extended Validator with domain-specific validation rules."""
def _validate_is_email(self, constraint: bool, field: str, value: Any) -> None:
"""Validates that the value is a valid email address.
The rule's arguments are validated against this schema:
{'type': 'boolean'}
"""
if constraint and isinstance(value, str):
pattern = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
if not re.match(pattern, value):
self._error(field, "must be a valid email address")
def _validate_is_sku(self, constraint: bool, field: str, value: Any) -> None:
"""Validates SKU format: UPPER-DIGITS (e.g. PROD-1001).
The rule's arguments are validated against this schema:
{'type': 'boolean'}
"""
if constraint and isinstance(value, str):
if not re.match(r"^[A-Z0-9]+-\d{4,}$", value):
self._error(field, "must match SKU format PROD-NNNN")
def _validate_is_past_date(self, constraint: bool, field: str, value: Any) -> None:
"""Validates that a datetime is not in the future.
The rule's arguments are validated against this schema:
{'type': 'boolean'}
"""
if constraint and isinstance(value, datetime):
if value > datetime.utcnow():
self._error(field, "must not be in the future")
def _validate_positive_total(self, constraint: bool, field: str, value: Any) -> None:
"""Validates that numeric value is strictly positive.
The rule's arguments are validated against this schema:
{'type': 'boolean'}
"""
if constraint and isinstance(value, (int, float)):
if value <= 0:
self._error(field, "must be a positive number")
# ─────────────────────────────────────────────────────────────────────────────
# Address schema
# ─────────────────────────────────────────────────────────────────────────────
ADDRESS_SCHEMA = {
"street": {
"type": "string",
"required": True,
"minlength": 1,
"maxlength": 200,
},
"city": {
"type": "string",
"required": True,
"minlength": 1,
"maxlength": 100,
},
"state": {
"type": "string",
"required": True,
"regex": r"^[A-Z]{2}$",
"coerce": str.upper,
},
"postal_code": {
"type": "string",
"required": True,
"regex": r"^\d{5}(-\d{4})?$",
},
"country": {
"type": "string",
"default": "US",
"minlength": 2,
"maxlength": 2,
"coerce": str.upper,
},
}
# ─────────────────────────────────────────────────────────────────────────────
# User schemas
# ─────────────────────────────────────────────────────────────────────────────
CREATE_USER_SCHEMA = {
"email": {
"type": "string",
"required": True,
"is_email": True,
"coerce": str.lower,
"maxlength": 254,
},
"first_name": {
"type": "string",
"required": True,
"minlength": 1,
"maxlength": 100,
},
"last_name": {
"type": "string",
"required": True,
"minlength": 1,
"maxlength": 100,
},
"password": {
"type": "string",
"required": True,
"minlength": 8,
"maxlength": 72,
},
"role": {
"type": "string",
"default": "user",
"allowed": ["user", "moderator", "admin"],
},
"age": {
"type": "integer",
"nullable": True,
"default": None,
"min": 0,
"max": 130,
"coerce": int,
},
"address": {
"type": "dict",
"nullable": True,
"default": None,
"schema": ADDRESS_SCHEMA,
},
}
UPDATE_USER_SCHEMA = {
"first_name": {
"type": "string",
"required": False,
"minlength": 1,
"maxlength": 100,
},
"last_name": {
"type": "string",
"required": False,
"minlength": 1,
"maxlength": 100,
},
"role": {
"type": "string",
"required": False,
"allowed": ["user", "moderator", "admin"],
},
}
# ─────────────────────────────────────────────────────────────────────────────
# Product schema
# ─────────────────────────────────────────────────────────────────────────────
PRODUCT_SCHEMA = {
"sku": {
"type": "string",
"required": True,
"is_sku": True,
},
"name": {
"type": "string",
"required": True,
"minlength": 1,
"maxlength": 200,
},
"price": {
"type": "float",
"required": True,
"positive_total": True,
"max": 100_000.0,
"coerce": float,
},
"stock": {
"type": "integer",
"required": True,
"min": 0,
"coerce": int,
},
"category": {
"type": "string",
"required": True,
"allowed": ["Electronics", "Clothing", "Books", "Home", "Sports"],
},
"is_active": {
"type": "boolean",
"default": True,
"coerce": bool,
},
"tags": {
"type": "list",
"default": [],
"schema": {"type": "string", "minlength": 1},
},
"metadata": {
"type": "dict",
"nullable": True,
"default": None,
"allow_unknown": True, # arbitrary key-value metadata
},
}
# ─────────────────────────────────────────────────────────────────────────────
# Order schema — with list of nested order-line dicts
# ─────────────────────────────────────────────────────────────────────────────
ORDER_LINE_SCHEMA = {
"product_id": {"type": "string", "required": True, "minlength": 1},
"sku": {"type": "string", "required": True, "is_sku": True},
"quantity": {"type": "integer", "required": True, "min": 1, "max": 10_000, "coerce": int},
"unit_price": {"type": "float", "required": True, "positive_total": True, "coerce": float},
}
CREATE_ORDER_SCHEMA = {
"user_id": {"type": "string", "required": True, "minlength": 1},
"lines": {
"type": "list",
"required": True,
"minlength": 1,
"schema": {
"type": "dict",
"schema": ORDER_LINE_SCHEMA,
},
},
"notes": {
"type": "string",
"nullable": True,
"default": None,
"maxlength": 500,
},
"shipping_address": {
"type": "dict",
"nullable": True,
"default": None,
"schema": ADDRESS_SCHEMA,
},
}
# ─────────────────────────────────────────────────────────────────────────────
# Validator instances — create once, reuse
# ─────────────────────────────────────────────────────────────────────────────
_create_user_validator = AppValidator(CREATE_USER_SCHEMA)
_update_user_validator = AppValidator(UPDATE_USER_SCHEMA, require_all=False)
_product_validator = AppValidator(PRODUCT_SCHEMA)
_create_order_validator = AppValidator(CREATE_ORDER_SCHEMA)
# ─────────────────────────────────────────────────────────────────────────────
# Validation helpers
# ─────────────────────────────────────────────────────────────────────────────
def validate_create_user(data: dict) -> tuple[Optional[dict], dict]:
"""
Returns (normalized_doc, {}) on success or (None, errors_dict) on failure.
v.document contains the coerce-normalized data after successful validate().
"""
v = AppValidator(CREATE_USER_SCHEMA)
if v.validate(data):
return v.document, {}
return None, v.errors
def validate_product(data: dict) -> tuple[Optional[dict], dict]:
v = AppValidator(PRODUCT_SCHEMA)
if v.validate(data):
return v.document, {}
return None, v.errors
def validate_order(data: dict) -> tuple[Optional[dict], dict]:
v = AppValidator(CREATE_ORDER_SCHEMA)
if v.validate(data):
return v.document, {}
return None, v.errors
def validate_partial_update(data: dict, schema: dict) -> tuple[Optional[dict], dict]:
"""PATCH semantics — only validate fields that are present in the request."""
partial_schema = {k: v for k, v in schema.items() if k in data}
v = AppValidator(partial_schema)
if v.validate(data):
return v.document, {}
return None, v.errors
# ─────────────────────────────────────────────────────────────────────────────
# purge_unknown — strip unexpected fields from input
# ─────────────────────────────────────────────────────────────────────────────
def sanitize_user_input(data: dict) -> dict:
"""
Validate and return only the expected fields — extra keys are purged.
Useful when proxying data to a downstream service.
"""
v = AppValidator(CREATE_USER_SCHEMA, purge_unknown=True)
v.validate(data)
return v.document # unknown keys removed
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
# Valid user creation
new_user = {
"email": "[email protected]", # coerce → lower
"first_name": "Alice",
"last_name": "Smith",
"password": "SecurePass1",
"role": "admin",
"age": "30", # coerce → int
"address": {
"street": "123 Main St",
"city": "Springfield",
"state": "il", # coerce → upper IL
"postal_code": "62701",
},
}
doc, errors = validate_create_user(new_user)
if doc:
print(f"Valid user:")
print(f" email: {doc['email']}") # [email protected]
print(f" age: {doc['age']} (int)")
print(f" state: {doc['address']['state']}") # IL
else:
print(f"Errors: {errors}")
# Invalid product
bad_product = {
"sku": "invalid-sku", # must match PROD-NNNN
"name": "Widget",
"price": -5.0, # must be positive
"stock": "ten", # can't coerce to int
"category": "Toys", # not in allowed list
}
doc, errors = validate_product(bad_product)
print(f"\nProduct errors:")
for field, msgs in errors.items():
print(f" {field}: {msgs}")
# Order with lines
raw_order = {
"user_id": "usr-001",
"lines": [
{"product_id": "p1", "sku": "PROD-1001", "quantity": "2", "unit_price": "19.99"},
{"product_id": "p2", "sku": "BOOK-2005", "quantity": "1", "unit_price": "39.99"},
],
}
doc, errors = validate_order(raw_order)
if doc:
print(f"\nOrder valid:")
print(f" lines: {len(doc['lines'])}")
print(f" line[0] qty: {doc['lines'][0]['quantity']} (int)")
# purge unknown fields
messy = {**new_user, "injected_field": "DROP TABLE users", "__proto__": "x"}
clean = sanitize_user_input(messy)
print(f"\nPurged unknown keys: 'injected_field' in clean = {'injected_field' in clean}")
print(f" clean keys: {sorted(clean.keys())}")
For the marshmallow alternative — marshmallow’s Schema class is the better choice when you need to both serialize (dump) and deserialize (load) — serializing ORM models to JSON for responses and validating JSON bodies on the way in — while Cerberus only validates dicts and returns the coerced document without a serialization direction, making Cerberus ideal for lightweight request body checks in Flask routes or MongoDB document validation where no separate response schema is needed. For the jsonschema alternative — jsonschema.validate(data, schema) validates against a JSON Schema draft (standard, tooling-agnostic), but the schema syntax is verbose and it has no built-in coerce step, while Cerberus’s {"coerce": int} + {"type": "integer", "min": 0} casts and validates in one call, its _validate_* extension API adds business rules with a docstring-schema contract, and v.document returns the fully normalized dict ready for database insertion. The Claude Skills 360 bundle includes Cerberus skill sets covering schema dict syntax, type/required/nullable/allowed constraints, minlength/maxlength/min/max ranges, regex patterns, coerce for automatic type conversion, nested dict and list schemas, keyschema/valueschema for map validation, dependencies for conditional requirements, allow_unknown and purge_unknown for untrusted input, custom validate* methods, and normalized document extraction. Start with the free tier to try dict validation code generation.