OpenAI’s platform offers more than chat completions: the Assistants API manages stateful threads with file retrieval and code execution. The Batch API processes millions of requests at half the cost, results available within 24 hours. Fine-tuning adapts GPT-4o-mini to domain-specific tasks with a fraction of prompt engineering overhead. Structured Outputs with response_format: { type: "json_schema" } guarantees valid JSON matching your schema. The Realtime API handles bidirectional audio for voice applications. Claude Code generates Assistants API integrations, batch job scripts, fine-tuning dataset preparation, and the structured output schemas for production OpenAI deployments.
CLAUDE.md for OpenAI Projects
## OpenAI Stack
- SDK: openai >= 1.50 (Python) or openai >= 4.60 (Node.js)
- Models: gpt-4o (smart), gpt-4o-mini (fast/cheap), o3 (reasoning)
- Assistants: for stateful multi-turn with file retrieval
- Batch: for offline bulk tasks — ~50% cost reduction
- Fine-tuning: gpt-4o-mini for domain adaptation (not basic prompting)
- Embeddings: text-embedding-3-large (1536d), text-embedding-3-small (1536d cheaper)
- Structured outputs: response_format with json_schema — validate client-side too
- Rate limits: implement exponential backoff with openai.Error.is_retryable()
Assistants API with File Search
# assistants/knowledge_assistant.py
from openai import OpenAI
import time
client = OpenAI()
def create_knowledge_assistant(name: str, instructions: str) -> str:
"""Create an assistant with file_search and code_interpreter tools."""
assistant = client.beta.assistants.create(
name=name,
instructions=instructions,
model="gpt-4o",
tools=[
{"type": "file_search"},
{"type": "code_interpreter"},
],
tool_resources={
"file_search": {
"vector_stores": [
{
"name": f"{name}-knowledge-base",
}
]
}
},
temperature=0.1,
response_format={"type": "text"},
)
return assistant.id
def upload_documents(vector_store_id: str, file_paths: list[str]) -> list[str]:
"""Upload documents to the assistant's file search vector store."""
file_ids = []
for path in file_paths:
with open(path, "rb") as f:
file = client.files.create(file=f, purpose="assistants")
file_ids.append(file.id)
# Add files to vector store (triggers automatic chunking + embedding)
batch = client.beta.vector_stores.file_batches.create_and_poll(
vector_store_id=vector_store_id,
file_ids=file_ids,
)
print(f"Files processed: {batch.file_counts.completed}/{batch.file_counts.total}")
return file_ids
def chat_with_assistant(assistant_id: str, user_message: str, thread_id: str | None = None) -> tuple[str, str]:
"""Send a message and get a response. Returns (response_text, thread_id)."""
# Create or continue thread
thread = (
client.beta.threads.retrieve(thread_id)
if thread_id
else client.beta.threads.create()
)
# Add user message to thread
client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_message,
)
# Run the assistant and wait for completion
run = client.beta.threads.runs.create_and_poll(
thread_id=thread.id,
assistant_id=assistant_id,
max_completion_tokens=2048,
)
if run.status != "completed":
raise RuntimeError(f"Run failed with status: {run.status}")
# Get the latest assistant message
messages = client.beta.threads.messages.list(
thread_id=thread.id,
limit=1,
order="desc",
)
response_text = messages.data[0].content[0].text.value
# Log citations from file search
for annotation in messages.data[0].content[0].text.annotations:
if annotation.type == "file_citation":
print(f"Citation from file: {annotation.file_citation.file_id}")
return response_text, thread.id
# Usage
assistant_id = create_knowledge_assistant(
name="Support Assistant",
instructions="Answer customer questions using the knowledge base. Cite sources. Be concise.",
)
response, thread_id = chat_with_assistant(assistant_id, "What is your return policy?")
print(response)
# Continue conversation in same thread
followup, _ = chat_with_assistant(
assistant_id,
"What about electronics specifically?",
thread_id=thread_id
)
Batch API for Bulk Inference
# batch/process_batch.py — cost-efficient bulk processing
import json
import time
from openai import OpenAI
from pathlib import Path
client = OpenAI()
def create_batch_file(requests: list[dict], output_path: str) -> str:
"""Create a JSONL file for batch submission."""
with open(output_path, "w") as f:
for i, req in enumerate(requests):
batch_request = {
"custom_id": f"request-{i}",
"method": "POST",
"url": "/v1/chat/completions",
"body": req,
}
f.write(json.dumps(batch_request) + "\n")
return output_path
def submit_and_wait(input_file_path: str, description: str = "") -> list[dict]:
"""Submit batch and poll until complete."""
# Upload batch file
with open(input_file_path, "rb") as f:
batch_file = client.files.create(file=f, purpose="batch")
# Create batch job
batch = client.batches.create(
input_file_id=batch_file.id,
endpoint="/v1/chat/completions",
completion_window="24h",
metadata={"description": description},
)
print(f"Batch {batch.id} submitted. Waiting...")
# Poll status
while True:
batch = client.batches.retrieve(batch.id)
print(f"Status: {batch.status} | {batch.request_counts.completed}/{batch.request_counts.total}")
if batch.status == "completed":
break
elif batch.status in ("failed", "expired", "cancelled"):
raise RuntimeError(f"Batch failed: {batch.status}")
time.sleep(30)
# Download results
result_file = client.files.content(batch.output_file_id)
results = []
for line in result_file.text.strip().split("\n"):
result = json.loads(line)
if result.get("error"):
print(f"Failed: {result['custom_id']}: {result['error']}")
else:
results.append({
"id": result["custom_id"],
"content": result["response"]["body"]["choices"][0]["message"]["content"],
"model": result["response"]["body"]["model"],
"tokens": result["response"]["body"]["usage"]["total_tokens"],
})
return results
# Usage: classify 50,000 customer reviews
def classify_reviews_batch(reviews: list[str]) -> list[str]:
requests = [
{
"model": "gpt-4o-mini",
"messages": [
{"role": "system", "content": "Classify sentiment as: positive, negative, or neutral. Reply with only the label."},
{"role": "user", "content": review},
],
"max_tokens": 10,
"temperature": 0,
}
for review in reviews
]
input_file = create_batch_file(requests, "/tmp/review_batch.jsonl")
results = submit_and_wait(input_file, "Review sentiment classification")
return [r["content"].lower().strip() for r in results]
Structured Outputs
# structured/extraction.py — guaranteed JSON schema compliance
from openai import OpenAI
from pydantic import BaseModel
import json
client = OpenAI()
class OrderExtraction(BaseModel):
customer_name: str | None
items: list[dict] # [{product, quantity, price}]
total: float | None
currency: str = "USD"
delivery_address: str | None
special_instructions: str | None
def extract_order_structured(text: str) -> OrderExtraction:
"""Extract order details with guaranteed JSON schema compliance."""
completion = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
"role": "system",
"content": "Extract order information from the text. Only include explicitly mentioned information.",
},
{"role": "user", "content": text},
],
response_format=OrderExtraction, # Pydantic model → JSON schema
)
# Guaranteed to parse successfully — OpenAI enforces the schema
return completion.choices[0].message.parsed
# Alternative: raw JSON schema for Node.js or manual schema
def extract_with_raw_schema(text: str) -> dict:
response = client.chat.completions.create(
model="gpt-4o-2024-08-06",
messages=[
{"role": "user", "content": f"Extract entities from: {text}"},
],
response_format={
"type": "json_schema",
"json_schema": {
"name": "entities",
"strict": True,
"schema": {
"type": "object",
"properties": {
"people": {"type": "array", "items": {"type": "string"}},
"organizations": {"type": "array", "items": {"type": "string"}},
"dates": {"type": "array", "items": {"type": "string"}},
},
"required": ["people", "organizations", "dates"],
"additionalProperties": False,
},
},
},
)
return json.loads(response.choices[0].message.content)
Fine-Tuning Data Preparation
# finetuning/prepare_dataset.py — prepare training data for fine-tuning
import json
from openai import OpenAI
client = OpenAI()
def create_training_example(
system: str,
user: str,
assistant: str,
) -> dict:
"""Create a single training example in chat format."""
return {
"messages": [
{"role": "system", "content": system},
{"role": "user", "content": user},
{"role": "assistant", "content": assistant},
]
}
def save_training_data(examples: list[dict], output_path: str) -> None:
"""Save training examples as JSONL."""
with open(output_path, "w") as f:
for example in examples:
f.write(json.dumps(example) + "\n")
print(f"Saved {len(examples)} examples to {output_path}")
def launch_fine_tuning(training_file_path: str, model: str = "gpt-4o-mini-2024-07-18") -> str:
"""Upload training file and start fine-tuning job."""
# Upload training file
with open(training_file_path, "rb") as f:
training_file = client.files.create(file=f, purpose="fine-tune")
print(f"Uploaded training file: {training_file.id}")
# Create fine-tuning job
job = client.fine_tuning.jobs.create(
training_file=training_file.id,
model=model,
hyperparameters={
"n_epochs": 3,
"batch_size": "auto",
"learning_rate_multiplier": "auto",
},
suffix="custom-classifier", # Model will be: gpt-4o-mini-...-custom-classifier
)
print(f"Fine-tuning job created: {job.id}")
return job.id
def monitor_job(job_id: str) -> str:
"""Monitor fine-tuning progress and return final model name."""
import time
while True:
job = client.fine_tuning.jobs.retrieve(job_id)
print(f"Status: {job.status}")
# List recent events
events = client.fine_tuning.jobs.list_events(job_id, limit=5)
for event in reversed(events.data):
print(f" {event.message}")
if job.status == "succeeded":
print(f"Fine-tuning complete! Model: {job.fine_tuned_model}")
return job.fine_tuned_model
elif job.status in ("failed", "cancelled"):
raise RuntimeError(f"Fine-tuning failed: {job.status}")
time.sleep(60)
# Example: prepare customer service training data
training_data = [
create_training_example(
system="You are a concise customer support agent. Keep responses under 3 sentences.",
user="How do I track my order?",
assistant="Log into your account and navigate to 'Orders'. Click on your order to see real-time tracking. You'll also receive email updates at each shipping milestone.",
),
create_training_example(
system="You are a concise customer support agent. Keep responses under 3 sentences.",
user="What's your return policy?",
assistant="Items can be returned within 30 days of delivery in original condition. Initiate returns from your Orders page — we provide a free prepaid label. Refunds process within 5-7 business days.",
),
# ... more examples (recommend 100-1000 for good results)
]
save_training_data(training_data, "training_data.jsonl")
For the Anthropic Claude API alternative with superior reasoning and longer context windows, see the Anthropic SDK guide for Claude integration patterns. For the Vercel AI SDK that unifies OpenAI and Anthropic under a single streaming interface, the Vercel AI SDK guide covers the React streaming patterns. The Claude Skills 360 bundle includes OpenAI skill sets covering Assistants API, Batch processing, and fine-tuning workflows. Start with the free tier to try OpenAI integration generation.