Pinecone is a managed vector database for AI semantic search — new Pinecone({ apiKey }) creates the client. pinecone.createIndex({ name, dimension: 1536, metric: "cosine", spec: { serverless: { cloud: "aws", region: "us-east-1" } } }) creates an index. const index = pinecone.index("my-index") gets the index handle. index.upsert([{ id, values: embedding, metadata: { title, url, text } }]) stores vectors. index.query({ vector: queryEmbedding, topK: 10, includeMetadata: true }) returns the nearest neighbors. Namespaces: index.namespace("user-123").upsert(...) for tenant isolation. Filters: filter: { category: { $eq: "docs" }, score: { $gte: 0.8 } } in query. index.fetch(["id1", "id2"]) retrieves exact vectors. index.deleteMany({ metadata: { userId: "user-123" } }) removes by filter. index.describeIndexStats() returns per-namespace counts. Hybrid search: combine dense + sparse vectors with sparseValues. Claude Code generates Pinecone RAG pipelines, semantic search APIs, and document retrieval systems.
CLAUDE.md for Pinecone
## Pinecone Stack
- Version: @pinecone-database/pinecone >= 4.0
- Init: const pc = new Pinecone({ apiKey: process.env.PINECONE_API_KEY! })
- Index: const index = pc.index(process.env.PINECONE_INDEX_NAME!)
- Upsert: await index.upsert([{ id: uuid, values: float32Array, metadata: { text, source, userId } }])
- Query: const { matches } = await index.query({ vector: queryEmbedding, topK: 5, includeMetadata: true, filter: { userId: { $eq: userId } } })
- Delete: await index.deleteMany({ filter: { docId: { $eq: docId } } })
- Namespace: index.namespace(tenantId).upsert(...) — isolate data per tenant
Embedding Utility
// lib/ai/embeddings.ts — OpenAI embeddings with batching
import OpenAI from "openai"
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })
export const EMBEDDING_MODEL = "text-embedding-3-small"
export const EMBEDDING_DIMENSION = 1536
export async function embedText(text: string): Promise<number[]> {
const response = await openai.embeddings.create({
model: EMBEDDING_MODEL,
input: text.trim().replace(/\n+/g, " "),
})
return response.data[0].embedding
}
export async function embedBatch(texts: string[], batchSize = 100): Promise<number[][]> {
const results: number[][] = []
for (let i = 0; i < texts.length; i += batchSize) {
const batch = texts.slice(i, i + batchSize).map((t) => t.trim().replace(/\n+/g, " "))
const response = await openai.embeddings.create({
model: EMBEDDING_MODEL,
input: batch,
})
results.push(...response.data.sort((a, b) => a.index - b.index).map((d) => d.embedding))
}
return results
}
/** Split text into overlapping chunks for better embedding coverage */
export function chunkText(text: string, chunkSize = 800, overlap = 100): string[] {
const words = text.split(/\s+/)
const chunks: string[] = []
let i = 0
while (i < words.length) {
chunks.push(words.slice(i, i + chunkSize).join(" "))
i += chunkSize - overlap
}
return chunks
}
Document Indexing Service
// lib/ai/pinecone.ts — document upsert and semantic search
import { Pinecone, type RecordMetadata } from "@pinecone-database/pinecone"
import { randomUUID } from "crypto"
import { embedText, embedBatch, chunkText } from "./embeddings"
const pc = new Pinecone({ apiKey: process.env.PINECONE_API_KEY! })
const INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? "knowledge-base"
export type DocMetadata = {
docId: string
chunkIndex: number
text: string
title: string
source: string
userId?: string
category?: string
createdAt: string
}
export type SearchResult = {
id: string
score: number
metadata: DocMetadata
}
/** Index a document by splitting into chunks and upserting all chunks */
export async function indexDocument(doc: {
docId: string
title: string
content: string
source: string
userId?: string
category?: string
namespace?: string
}): Promise<number> {
const chunks = chunkText(doc.content)
const embeddings = await embedBatch(chunks)
const vectors = chunks.map((text, i) => ({
id: `${doc.docId}#${i}`,
values: embeddings[i],
metadata: {
docId: doc.docId,
chunkIndex: i,
text,
title: doc.title,
source: doc.source,
userId: doc.userId ?? "",
category: doc.category ?? "general",
createdAt: new Date().toISOString(),
} satisfies DocMetadata,
}))
// Upsert in batches of 100
const index = pc.index<DocMetadata>(INDEX_NAME)
const ns = doc.namespace ? index.namespace(doc.namespace) : index
for (let i = 0; i < vectors.length; i += 100) {
await ns.upsert(vectors.slice(i, i + 100) as any)
}
return chunks.length
}
/** Semantic search — returns top-k matching chunks */
export async function semanticSearch(
query: string,
options: {
topK?: number
namespace?: string
filter?: Record<string, unknown>
minScore?: number
} = {},
): Promise<SearchResult[]> {
const { topK = 8, namespace, filter, minScore = 0.5 } = options
const queryVector = await embedText(query)
const index = pc.index<DocMetadata>(INDEX_NAME)
const ns = namespace ? index.namespace(namespace) : index
const { matches } = await ns.query({
vector: queryVector,
topK,
includeMetadata: true,
filter: filter as Record<string, unknown>,
})
return (matches ?? [])
.filter((m) => (m.score ?? 0) >= minScore)
.map((m) => ({
id: m.id,
score: m.score ?? 0,
metadata: m.metadata as DocMetadata,
}))
}
/** Delete all vectors for a document */
export async function deleteDocument(docId: string, namespace?: string): Promise<void> {
const index = pc.index<DocMetadata>(INDEX_NAME)
const ns = namespace ? index.namespace(namespace) : index
await ns.deleteMany({ filter: { docId: { $eq: docId } } })
}
/** Get index stats per namespace */
export async function getIndexStats() {
const index = pc.index(INDEX_NAME)
return index.describeIndexStats()
}
Semantic Search API Route
// app/api/search/route.ts — Next.js semantic search endpoint
import { NextResponse } from "next/server"
import { z } from "zod"
import { semanticSearch } from "@/lib/ai/pinecone"
import { auth } from "@/lib/auth"
const SearchSchema = z.object({
query: z.string().min(2).max(500),
category: z.string().optional(),
topK: z.number().int().min(1).max(20).default(8),
})
export async function POST(req: Request) {
try {
const session = await auth()
const body = await req.json()
const { query, category, topK } = SearchSchema.parse(body)
const filter: Record<string, unknown> = {}
if (category) filter.category = { $eq: category }
if (session?.user?.id) filter.userId = { $in: [session.user.id, "public"] }
const results = await semanticSearch(query, {
topK,
filter: Object.keys(filter).length ? filter : undefined,
namespace: session?.user?.id,
})
// De-duplicate by docId — keep only the best chunk per document
const seen = new Set<string>()
const deduplicated = results.filter((r) => {
if (seen.has(r.metadata.docId)) return false
seen.add(r.metadata.docId)
return true
})
return NextResponse.json({
results: deduplicated.map((r) => ({
id: r.metadata.docId,
title: r.metadata.title,
excerpt: r.metadata.text.slice(0, 280) + (r.metadata.text.length > 280 ? "…" : ""),
source: r.metadata.source,
category: r.metadata.category,
score: Math.round(r.score * 1000) / 1000,
})),
total: deduplicated.length,
})
} catch (err) {
if (err instanceof z.ZodError) {
return NextResponse.json({ error: "Invalid request", issues: err.issues }, { status: 400 })
}
console.error("[Search API]", err)
return NextResponse.json({ error: "Search failed" }, { status: 500 })
}
}
For the Weaviate alternative when a self-hostable open-source vector database, hybrid BM25+vector search, built-in object storage with GraphQL schema, and multi-tenancy at the class level are preferred — Weaviate is fully open source and can run on-prem while Pinecone is fully managed with no infrastructure to operate, see the Weaviate guide. For the Chroma alternative when an embedded, in-process vector store (no separate server required) for development, testing, or small self-contained AI apps is needed — Chroma can run as a Python or JS in-memory store while Pinecone handles billions of vectors at production scale with sub-millisecond query latency, see the Chroma guide. The Claude Skills 360 bundle includes Pinecone skill sets covering RAG pipelines, namespace isolation, and semantic search APIs. Start with the free tier to try vector search generation.