Outlines generates guaranteed structured output from local LLMs at the token level. pip install outlines. import outlines. Load model: model = outlines.models.transformers("meta-llama/Llama-3.2-3B-Instruct", device="cuda"). JSON generation with Pydantic: from pydantic import BaseModel; class User(BaseModel): name: str; age: int. generator = outlines.generate.json(model, User), user = generator("Extract: John Doe, age 30") — returns validated User instance. JSON Schema: generator = outlines.generate.json(model, {"type":"object","properties":{"name":{"type":"string"},"age":{"type":"integer"}}}). Regex: generator = outlines.generate.regex(model, r"\d{3}-\d{2}-\d{4}"), ssn = generator("My SSN is: "). Choice: generator = outlines.generate.choice(model, ["positive","neutral","negative"]), label = generator("Sentiment of: great product!"). Text: generator = outlines.generate.text(model), text = generator("Write a haiku:"). Type: generator = outlines.generate.format(model, int), n = generator("How many planets?"). Batch: users = generator(["Extract info from Alice, 25", "Extract info from Bob, 30"]). Sampling: generator = outlines.generate.json(model, User, sampler=outlines.samplers.greedy()) or multinomial(samples=3, temperature=0.7). vLLM backend: model = outlines.models.vllm("meta-llama/Llama-3.2-3B-Instruct"). llama.cpp: model = outlines.models.llamacpp("model.gguf"). Outlines works by constructing a finite state machine from the schema and applying it as a token-level mask during generation — no post-processing, no retries, guaranteed valid output every time. Claude Code generates Outlines schemas, constrained decoders, extraction pipelines, and classification generators for local LLM inference.
CLAUDE.md for Outlines
## Outlines Stack
- Version: outlines >= 0.1.0
- Load: outlines.models.transformers(hf_model_id) | vllm(model_id) | llamacpp(gguf_path)
- JSON: outlines.generate.json(model, PydanticModel | json_schema_dict)
- Regex: outlines.generate.regex(model, r"pattern") — guaranteed match
- Choice: outlines.generate.choice(model, ["opt1","opt2"]) — always one of the options
- Format: outlines.generate.format(model, int | float | bool) — typed scalars
- Sampler: greedy() | multinomial(samples=N, temperature=T)
- Guarantee: token-level FSM mask — no post-processing or retries needed
Outlines Structured Generation
# generation/outlines_generate.py — constrained local LLM generation
from __future__ import annotations
import re
from enum import Enum
from typing import Optional
import outlines
from pydantic import BaseModel, Field
# ── 1. Load model (shared across all generators) ─────────────────────────────
def get_model(
model_id: str = "microsoft/Phi-3.5-mini-instruct",
device: str = "cuda",
dtype: str = "float16",
):
"""Load a HuggingFace model for constrained generation."""
return outlines.models.transformers(
model_id,
device=device,
model_kwargs={"torch_dtype": dtype},
)
# ── 2. JSON generation with Pydantic schemas ──────────────────────────────────
class ContactInfo(BaseModel):
name: str
email: Optional[str] = None
phone: Optional[str] = None
company: Optional[str] = None
class Invoice(BaseModel):
vendor: str
amount: float = Field(ge=0)
currency: str = Field(pattern=r"[A-Z]{3}") # ISO currency code
date: str = Field(pattern=r"\d{4}-\d{2}-\d{2}")
description: str
class MedicalEntity(BaseModel):
"""Named entity from clinical text."""
text: str
entity_type: str = Field(description="MEDICATION|CONDITION|PROCEDURE|DOSAGE")
start: int
end: int
confidence: float = Field(ge=0.0, le=1.0)
def extract_contact(model, text: str) -> ContactInfo:
"""Extract contact info — guaranteed valid ContactInfo every time."""
generator = outlines.generate.json(model, ContactInfo)
prompt = f"Extract contact information from this text:\n{text}\n\nContact:"
return generator(prompt)
def extract_invoice(model, ocr_text: str) -> Invoice:
"""Extract invoice fields — no post-processing needed."""
generator = outlines.generate.json(model, Invoice)
prompt = f"Extract invoice details:\n{ocr_text}\n\nInvoice:"
return generator(prompt)
def extract_entities_batch(
model,
texts: list[str],
) -> list[list[MedicalEntity]]:
"""Batch entity extraction — one call per text."""
generator = outlines.generate.json(model, list[MedicalEntity])
prompts = [f"Extract medical entities:\n{t}\n\nEntities:" for t in texts]
return generator(prompts) # Batched inference
# ── 3. Regex-constrained generation ──────────────────────────────────────────
def extract_phone_number(model, text: str) -> str:
"""Extract phone number matching US format exactly."""
generator = outlines.generate.regex(
model,
r"\+?1?\s*\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}",
)
return generator(f"Extract the phone number from: {text}\nPhone:")
def extract_date(model, text: str) -> str:
"""Extract date in ISO format — guaranteed YYYY-MM-DD."""
generator = outlines.generate.regex(model, r"\d{4}-\d{2}-\d{2}")
return generator(f"What is the date mentioned in: {text}\nDate:")
def extract_price(model, text: str) -> str:
"""Extract price — guaranteed format like $123.45."""
generator = outlines.generate.regex(model, r"\$\d{1,6}\.\d{2}")
return generator(f"Extract the price from: {text}\nPrice:")
# ── 4. Classification with choice ────────────────────────────────────────────
class SentimentLabel(str, Enum):
POSITIVE = "positive"
NEUTRAL = "neutral"
NEGATIVE = "negative"
def classify_sentiment(model, text: str) -> str:
"""Classify sentiment — always returns one of the three labels."""
generator = outlines.generate.choice(model, ["positive", "neutral", "negative"])
return generator(f"Sentiment of the following text:\n\"{text}\"\n\nSentiment:")
def classify_priority(model, text: str) -> str:
"""Classify support ticket priority — guaranteed valid value."""
generator = outlines.generate.choice(model, ["critical", "high", "medium", "low"])
return generator(f"Priority of this support ticket:\n\"{text}\"\n\nPriority:")
def batch_classify(
model,
texts: list[str],
labels: list[str],
) -> list[str]:
"""Batch classification — all results guaranteed to be in labels."""
generator = outlines.generate.choice(model, labels)
prompts = [f"Classify: \"{t}\"\nLabel:" for t in texts]
return generator(prompts)
# ── 5. Typed scalar extraction ────────────────────────────────────────────────
def extract_count(model, text: str) -> int:
"""Extract a numeric count from text — guaranteed integer output."""
generator = outlines.generate.format(model, int)
return generator(f"How many items are mentioned in: \"{text}\"\nCount:")
def extract_score(model, text: str) -> float:
"""Extract a score between 0 and 1 — guaranteed float output."""
generator = outlines.generate.format(model, float)
return generator(f"On a scale of 0.0 to 1.0, rate the quality of: \"{text}\"\nScore:")
# ── 6. Complex nested schema ──────────────────────────────────────────────────
class ProductReview(BaseModel):
product_name: str
rating: int = Field(ge=1, le=5)
pros: list[str] = Field(min_length=1, max_length=5)
cons: list[str] = Field(min_length=0, max_length=5)
verdict: str = Field(pattern="(buy|skip|wait)")
summary: str = Field(max_length=200)
def analyze_review(model, review_text: str) -> ProductReview:
"""Parse a product review into structured fields."""
generator = outlines.generate.json(model, ProductReview)
prompt = f"Analyze this product review:\n{review_text}\n\nAnalysis:"
return generator(prompt)
# ── 7. Custom grammar (EBNF) ──────────────────────────────────────────────────
ARITHMETIC_GRAMMAR = r"""
?start: expr
?expr: term (("+" | "-") term)*
?term: factor (("*" | "/") factor)*
?factor: NUMBER | "(" expr ")"
NUMBER: /\d+(\.\d+)?/
%ignore /\s+/
"""
def generate_arithmetic_expression(model, description: str) -> str:
"""Generate valid arithmetic expressions using EBNF grammar."""
generator = outlines.generate.cfg(model, ARITHMETIC_GRAMMAR)
return generator(f"Write an arithmetic expression for: {description}\nExpression:")
# ── Demo ──────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("Loading model...")
model = get_model("microsoft/Phi-3.5-mini-instruct")
# Contact extraction
contact = extract_contact(model, "Call Sarah at [email protected] or +1-555-867-5309")
print(f"Contact: {contact.name}, {contact.email}, {contact.phone}")
# Classification
label = classify_sentiment(model, "This product exceeded all my expectations!")
print(f"Sentiment: {label}")
priority = classify_priority(model, "Production is down, all users affected!")
print(f"Priority: {priority}")
# Regex
phone = extract_phone_number(model, "My number is 415-555-1234, please call anytime.")
print(f"Phone: {phone}")
# Typed scalar
count = extract_count(model, "I bought three apples, two oranges, and a banana.")
print(f"Count: {count}")
# Complex nested
review = analyze_review(model, "The MacBook Pro is incredibly fast and has great battery life, though it's expensive and ports are limited. Overall a premium laptop.")
print(f"Review: rating={review.rating}/5, verdict={review.verdict}")
For the Instructor (with API models) alternative when using hosted LLM APIs like Anthropic or OpenAI where token-level control is unavailable — Instructor uses Pydantic validation + retry loops (2-3 extra API calls on failure) while Outlines applies FSM-based constraints directly during token sampling, making it impossible for a local model to produce invalid output without any retries. For the LMQL/guidance alternative when needing interleaved code execution within generation or multi-part constrained programs that branch based on generated values — LMQL provides a full programming language for generation control while Outlines focuses specifically on output type constraints (JSON/regex/choice) with a simpler API that covers the vast majority of structured extraction use cases. The Claude Skills 360 bundle includes Outlines skill sets covering JSON Pydantic generation, regex constraint, choice classification, typed scalars, batch inference, and custom EBNF grammars. Start with the free tier to try constrained LLM code generation.