The Anthropic SDK gives you direct access to Claude’s most powerful features: tool use (function calling), streaming, multi-turn conversations, and Computer Use. Claude Code writes the orchestration logic — tool schemas, retry/fallback patterns, token tracking, and the agentic loops that make Claude autonomous. These patterns apply whether you’re building a customer support bot, a code analysis tool, or a full autonomous agent.
CLAUDE.md for Anthropic SDK Projects
## Anthropic SDK Stack
- SDK: anthropic Python SDK (latest) or @anthropic-ai/sdk (TypeScript)
- Model: claude-sonnet-4-6 for production, claude-haiku-4-5 for high-volume/speed
- Tool use: always define strict JSON schemas; validate inputs before calling
- Streaming: use stream() for UX-visible responses; non-stream for batch processing
- Token budget: track input+output per request; set max_tokens conservatively
- Retries: exponential backoff on 529 (overloaded), immediate retry on 529 only
- System prompts: versioned in files, not inline strings
- No prompt injection: sanitize user content before interpolating into prompts
Tool Use (Function Calling)
# ai/tools.py — define tools with strict schemas
import anthropic
import json
from typing import Any
client = anthropic.Anthropic()
# Tool definitions — Claude decides when and how to call these
TOOLS = [
{
"name": "search_orders",
"description": "Search customer orders by status, date range, or customer ID. Use when the customer asks about their orders.",
"input_schema": {
"type": "object",
"properties": {
"customer_id": {
"type": "string",
"description": "Customer ID to filter by (optional)"
},
"status": {
"type": "string",
"enum": ["pending", "processing", "shipped", "delivered", "cancelled"],
"description": "Order status to filter by (optional)"
},
"limit": {
"type": "integer",
"minimum": 1,
"maximum": 20,
"default": 5,
"description": "Maximum number of orders to return"
}
},
"required": []
}
},
{
"name": "get_order_details",
"description": "Get detailed information about a specific order including items, tracking, and timeline.",
"input_schema": {
"type": "object",
"properties": {
"order_id": {
"type": "string",
"description": "The order ID (format: ORD-XXXXXX)"
}
},
"required": ["order_id"]
}
},
{
"name": "update_order_status",
"description": "Update an order status. Only use when the customer explicitly requests a cancellation or modification.",
"input_schema": {
"type": "object",
"properties": {
"order_id": {"type": "string"},
"new_status": {
"type": "string",
"enum": ["cancelled"]
},
"reason": {"type": "string", "description": "Reason for the update"}
},
"required": ["order_id", "new_status", "reason"]
}
}
]
def execute_tool(name: str, inputs: dict) -> Any:
"""Dispatch tool calls to actual implementations."""
if name == "search_orders":
return db.search_orders(**inputs)
elif name == "get_order_details":
return db.get_order(inputs["order_id"])
elif name == "update_order_status":
return db.update_order_status(inputs["order_id"], inputs["new_status"], inputs["reason"])
else:
raise ValueError(f"Unknown tool: {name}")
Agentic Loop
# ai/agent.py — agentic loop that runs tools until Claude returns text
def run_agent(user_message: str, conversation_history: list[dict]) -> tuple[str, list[dict]]:
"""
Run the agent loop: send message, execute any tool calls, continue until done.
Returns (final_text_response, updated_history).
"""
messages = conversation_history + [{"role": "user", "content": user_message}]
while True:
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=SYSTEM_PROMPT,
tools=TOOLS,
messages=messages,
)
# Append Claude's response to history
messages.append({"role": "assistant", "content": response.content})
if response.stop_reason == "end_turn":
# Claude finished — extract text response
text = next(b.text for b in response.content if hasattr(b, 'text'))
return text, messages
if response.stop_reason == "tool_use":
# Execute tool calls and append results
tool_results = []
for block in response.content:
if block.type != "tool_use":
continue
try:
result = execute_tool(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": json.dumps(result),
})
except Exception as e:
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": f"Error: {str(e)}",
"is_error": True,
})
# Append tool results and continue the loop
messages.append({"role": "user", "content": tool_results})
Streaming Responses
# ai/streaming.py — stream tokens as they arrive for responsive UX
from anthropic import Anthropic
import sys
def stream_response(user_message: str) -> str:
"""Stream Claude's response, printing tokens as they arrive."""
client = Anthropic()
full_text = ""
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": user_message}],
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
full_text += text
print() # Final newline
return full_text
# FastAPI streaming endpoint
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
app = FastAPI()
@app.post("/api/chat/stream")
async def chat_stream(request: ChatRequest):
async def generate():
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
system=SYSTEM_PROMPT,
messages=request.messages,
) as stream:
for text in stream.text_stream:
# SSE format
yield f"data: {json.dumps({'text': text})}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
Token Tracking and Cost Management
# ai/usage_tracker.py
from dataclasses import dataclass, field
from anthropic.types import Message
# Pricing per million tokens (as of 2026)
PRICING = {
"claude-sonnet-4-6": {"input": 3.0, "output": 15.0},
"claude-haiku-4-5": {"input": 0.25, "output": 1.25},
"claude-opus-4-6": {"input": 15.0, "output": 75.0},
}
@dataclass
class UsageAccumulator:
model: str
input_tokens: int = 0
output_tokens: int = 0
requests: int = 0
def add(self, response: Message):
self.input_tokens += response.usage.input_tokens
self.output_tokens += response.usage.output_tokens
self.requests += 1
@property
def cost_usd(self) -> float:
prices = PRICING.get(self.model, PRICING["claude-sonnet-4-6"])
return (
self.input_tokens / 1_000_000 * prices["input"] +
self.output_tokens / 1_000_000 * prices["output"]
)
def summary(self) -> dict:
return {
"model": self.model,
"requests": self.requests,
"input_tokens": self.input_tokens,
"output_tokens": self.output_tokens,
"total_tokens": self.input_tokens + self.output_tokens,
"cost_usd": round(self.cost_usd, 4),
}
Batch Processing
# ai/batch.py — process many items concurrently with rate limiting
import asyncio
from anthropic import AsyncAnthropic
async_client = AsyncAnthropic()
async def classify_batch(items: list[str], concurrency: int = 10) -> list[dict]:
"""Classify many items concurrently, respecting rate limits."""
semaphore = asyncio.Semaphore(concurrency)
async def classify_one(item: str) -> dict:
async with semaphore:
response = await async_client.messages.create(
model="claude-haiku-4-5", # Faster/cheaper for classification
max_tokens=100,
messages=[{
"role": "user",
"content": f"Classify this customer message into one category (order_status/refund/product_question/complaint/other). Message: {item}\n\nRespond with JSON: {{\"category\": \"...\", \"confidence\": 0.0-1.0}}"
}],
)
try:
return json.loads(response.content[0].text)
except json.JSONDecodeError:
return {"category": "other", "confidence": 0.0}
return await asyncio.gather(*[classify_one(item) for item in items])
Retry with Exponential Backoff
# ai/retry.py
import time
import random
from anthropic import APIStatusError, APIConnectionError
def with_retry(fn, max_retries: int = 3, base_delay: float = 1.0):
"""Retry on transient errors with exponential backoff + jitter."""
for attempt in range(max_retries):
try:
return fn()
except APIStatusError as e:
if e.status_code == 529: # Overloaded
if attempt == max_retries - 1:
raise
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
time.sleep(delay)
elif e.status_code in (400, 401, 403, 404):
raise # Don't retry client errors
else:
raise
except APIConnectionError:
if attempt == max_retries - 1:
raise
time.sleep(base_delay * (2 ** attempt))
For the LLM evaluation framework that tests Claude-powered applications built with this SDK, see the LLM evals guide. For the RAG patterns that provide Claude with retrieval context, the RAG guide covers vector search integration. The Claude Skills 360 bundle includes Anthropic SDK skill sets covering tool use orchestration, streaming UX patterns, and production agent loops. Start with the free tier to try agent pattern generation.