Chroma is an embedded open-source vector store ideal for development and small deployments — new ChromaClient() connects to a running Chroma server (default localhost:8000). new ChromaClient({ path: ":memory:" }) creates in-memory for tests. await client.createCollection({ name, embeddingFunction: new OpenAIEmbeddingFunction({ apiKey }) }) creates a collection. await collection.add({ ids: ["id1"], documents: ["text content"], metadatas: [{ source: "docs" }] }) inserts with auto-embedding. await collection.query({ queryTexts: ["search query"], nResults: 5 }) returns nearest neighbors. where: { source: { $eq: "official-docs" } } filters metadata. where_document: { $contains: "keyword" } filters by document content. await collection.update({ ids: ["id1"], documents: ["new text"] }) updates. await collection.delete({ ids: ["id1"] }) removes entries. collection.peek() inspects first 10 items. Claude Code generates Chroma RAG pipelines, document Q&A, and lightweight similarity search.
CLAUDE.md for Chroma
## Chroma Stack
- Version: chromadb >= 1.9 (JS/TS client for Chroma server)
- Start server: docker run -p 8000:8000 chromadb/chroma (or pip install chromadb && chroma run)
- Init: const client = new ChromaClient({ path: "http://localhost:8000" })
- Collection: const col = await client.getOrCreateCollection({ name: "docs", embeddingFunction: new OpenAIEmbeddingFunction({ openai_api_key: key }) })
- Add: await col.add({ ids, documents, metadatas })
- Query: const results = await col.query({ queryTexts: [query], nResults: 8, where: { category: "tech" } })
- Delete: await col.delete({ where: { docId: { $eq: docId } } })
- Count: await col.count()
Chroma Client and Collections
// lib/chroma/client.ts — ChromaDB client with OpenAI embeddings
import { ChromaClient, OpenAIEmbeddingFunction, type Collection } from "chromadb"
let _client: ChromaClient | null = null
let _collection: Collection | null = null
export const COLLECTION_NAME = process.env.CHROMA_COLLECTION ?? "knowledge-base"
export function getChromaClient(): ChromaClient {
if (!_client) {
_client = new ChromaClient({
path: process.env.CHROMA_URL ?? "http://localhost:8000",
})
}
return _client
}
export function getEmbeddingFunction() {
return new OpenAIEmbeddingFunction({
openai_api_key: process.env.OPENAI_API_KEY!,
openai_model: "text-embedding-3-small",
})
}
export async function getCollection(): Promise<Collection> {
if (_collection) return _collection
const client = getChromaClient()
_collection = await client.getOrCreateCollection({
name: COLLECTION_NAME,
embeddingFunction: getEmbeddingFunction(),
metadata: { "hnsw:space": "cosine" },
})
return _collection
}
/** Reset collection (useful in tests) */
export async function resetCollection(): Promise<Collection> {
const client = getChromaClient()
try {
await client.deleteCollection({ name: COLLECTION_NAME })
} catch {
// ignore if not exists
}
_collection = null
return getCollection()
}
Document Store
// lib/chroma/store.ts — add, query, and delete documents
import { getCollection } from "./client"
import { chunkText } from "@/lib/ai/embeddings"
type DocMeta = {
docId: string
chunkIndex: number
title: string
source: string
category: string
userId: string
}
export type SearchHit = {
id: string
text: string
metadata: DocMeta
distance: number
}
/** Ingest a document — chunks and embeds via Chroma's embedding function */
export async function addDocument(doc: {
docId: string
title: string
content: string
source: string
category?: string
userId?: string
}): Promise<number> {
const collection = await getCollection()
const chunks = chunkText(doc.content)
const ids = chunks.map((_, i) => `${doc.docId}#${i}`)
const metadatas: DocMeta[] = chunks.map((_, i) => ({
docId: doc.docId,
chunkIndex: i,
title: doc.title,
source: doc.source,
category: doc.category ?? "general",
userId: doc.userId ?? "public",
}))
// Chroma accepts batches — add all chunks at once
await collection.add({
ids,
documents: chunks,
metadatas: metadatas as any,
})
return chunks.length
}
/** Semantic search with optional metadata filter */
export async function searchDocuments(
query: string,
options: {
nResults?: number
category?: string
userId?: string
} = {},
): Promise<SearchHit[]> {
const { nResults = 8, category, userId } = options
const collection = await getCollection()
// Build where filter
const where: Record<string, unknown> = {}
if (category) where.category = { $eq: category }
if (userId) {
// Match user's docs OR public docs — Chroma doesn't support $or natively,
// so do two queries and merge
const [userResults, publicResults] = await Promise.all([
collection.query({
queryTexts: [query],
nResults,
where: { ...where, userId: { $eq: userId } },
}),
collection.query({
queryTexts: [query],
nResults,
where: { ...where, userId: { $eq: "public" } },
}),
])
return mergeAndDedup(query, userResults, publicResults, nResults)
}
const result = await collection.query({
queryTexts: [query],
nResults,
...(Object.keys(where).length ? { where } : {}),
})
return formatResults(result)
}
function formatResults(result: Awaited<ReturnType<Awaited<ReturnType<typeof getCollection>>["query"]>>): SearchHit[] {
const hits: SearchHit[] = []
const ids = result.ids[0] ?? []
const docs = result.documents[0] ?? []
const metas = result.metadatas[0] ?? []
const dists = result.distances?.[0] ?? []
ids.forEach((id, i) => {
hits.push({
id,
text: docs[i] ?? "",
metadata: metas[i] as unknown as DocMeta,
distance: dists[i] ?? 1,
})
})
return hits
}
function mergeAndDedup(
_query: string,
a: Parameters<typeof formatResults>[0],
b: Parameters<typeof formatResults>[0],
limit: number,
): SearchHit[] {
const combined = [...formatResults(a), ...formatResults(b)]
.sort((x, y) => x.distance - y.distance)
// Dedup by docId
const seen = new Set<string>()
return combined.filter((h) => {
const key = h.metadata.docId
if (seen.has(key)) return false
seen.add(key)
return true
}).slice(0, limit)
}
/** Delete all chunks for a document */
export async function deleteDocument(docId: string): Promise<void> {
const collection = await getCollection()
const allIds = await collection.get({ where: { docId: { $eq: docId } } })
if (allIds.ids.length) {
await collection.delete({ ids: allIds.ids })
}
}
/** Get collection statistics */
export async function getStats() {
const collection = await getCollection()
const count = await collection.count()
const sample = await collection.peek()
return { count, sampleIds: sample.ids }
}
RAG Q&A API Route
// app/api/qa/route.ts — RAG question answering with Chroma + OpenAI
import { NextResponse } from "next/server"
import { z } from "zod"
import OpenAI from "openai"
import { searchDocuments } from "@/lib/chroma/store"
import { auth } from "@/lib/auth"
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })
const QASchema = z.object({
question: z.string().min(3).max(600),
category: z.string().optional(),
})
export async function POST(req: Request) {
const session = await auth()
const body = await req.json()
const { question, category } = QASchema.parse(body)
// 1. Retrieve relevant chunks
const hits = await searchDocuments(question, {
nResults: 5,
category,
userId: session?.user?.id,
})
if (hits.length === 0) {
return NextResponse.json({ answer: "I don't have information about that.", sources: [] })
}
// 2. Build context from top hits
const context = hits
.map((h, i) => `[${i + 1}] ${h.text}`)
.join("\n\n")
// 3. Generate answer
const completion = await openai.chat.completions.create({
model: "gpt-4o-mini",
messages: [
{
role: "system",
content: "You are a helpful assistant. Answer based only on the provided context. If the context doesn't contain the answer, say so.",
},
{
role: "user",
content: `Context:\n${context}\n\nQuestion: ${question}`,
},
],
temperature: 0.2,
max_tokens: 512,
})
const answer = completion.choices[0].message.content ?? "No answer generated."
const sources = [...new Set(hits.map((h) => h.metadata.source))]
return NextResponse.json({ answer, sources, hitCount: hits.length })
}
For the Pinecone alternative when managed, serverless infrastructure with billion-vector scale, sub-millisecond query latency, namespace-based multitenancy, and no infrastructure to operate is required — Pinecone is purpose-built for production AI applications at scale while Chroma is ideal for development, prototyping, and small deployments up to a few million vectors, see the Pinecone guide. For the Weaviate alternative when self-hosted production deployment, GraphQL schema, built-in BM25+vector hybrid search, generative search modules, and multi-tenancy at the class level are needed — Weaviate is the open-source vector database for complex production use cases while Chroma is simpler and better for developer-first RAG prototypes, see the Weaviate guide. The Claude Skills 360 bundle includes Chroma skill sets covering document ingestion, RAG pipelines, and Q&A APIs. Start with the free tier to try embedded vector search generation.