sqlite-utils loads data into SQLite and queries it easily. pip install sqlite-utils. Open: from sqlite_utils import Database; db = Database("data.db"). Memory: db = Database(memory=True). Insert: db["people"].insert({"name":"Alice","age":30}). Insert many: db["people"].insert_all(rows). Upsert: db["people"].upsert({"id":1,"name":"Bob"}, pk="id"). upsert_all(rows, pk="id"). Auto-create: insert creates table from dict keys. Create explicit: db["t"].create({"id": int, "name": str, "score": float}, pk="id"). Alter (add col): db["t"].add_column("extra", str). Transform: db["t"].transform(rename={"old": "new"}, drop={"col"}). Query: db["t"].rows_where("age > ?", [25]). All rows: list(db["t"].rows). Count: db["t"].count. Search: db["t"].enable_fts(["name","bio"]); db["t"].search("Alice"). Index: db["t"].create_index(["email"], unique=True). FKs: db["orders"].add_foreign_key("user_id", "users", "id"). Index FKs: db.index_foreign_keys(). Execute: db.execute("SELECT * FROM t WHERE col=?", [val]). db.execute_returning_dicts(sql). Schema: db["t"].schema — CREATE TABLE SQL. Columns: db["t"].columns_dict. CLI: sqlite-utils insert data.db people data.json --pk id. sqlite-utils query data.db "SELECT * FROM people". Convert: db["t"].convert("col", lambda v: v.upper()). Claude Code generates sqlite-utils insert pipelines, FTS search, and JSON-to-SQLite ETL.
CLAUDE.md for sqlite-utils
## sqlite-utils Stack
- Version: sqlite-utils >= 3.35 | pip install sqlite-utils
- Open: Database("path.db") | Database(memory=True)
- Insert: db["table"].insert(dict) | insert_all(rows) — auto-creates schema
- Upsert: db["table"].upsert(row, pk="id") | upsert_all(rows, pk="id")
- Query: db["table"].rows_where("col > ?", [val]) | .rows (all)
- FTS: db["t"].enable_fts(["col"]); db["t"].search("query")
sqlite-utils Data Pipeline
# app/db_utils.py — sqlite-utils insert, upsert, FTS, transform, and query helpers
from __future__ import annotations
import json
from pathlib import Path
from typing import Any, Generator, Iterable
from sqlite_utils import Database
from sqlite_utils.db import Table
# ─────────────────────────────────────────────────────────────────────────────
# 1. Database factory
# ─────────────────────────────────────────────────────────────────────────────
def open_db(
path: str | Path | None = None,
wal: bool = True,
) -> Database:
"""
Open or create a SQLite database.
path=None → in-memory database.
wal=True: enables WAL mode for better write concurrency.
Example:
db = open_db("data.db")
db = open_db() # in-memory
"""
if path is None:
return Database(memory=True)
db = Database(str(path))
if wal:
db.execute("PRAGMA journal_mode=WAL")
db.execute("PRAGMA synchronous=NORMAL")
return db
def table(db: Database, name: str) -> Table:
"""Return a Table object, creating if needed on first insert."""
return db[name] # type: ignore[return-value]
# ─────────────────────────────────────────────────────────────────────────────
# 2. Insert / upsert helpers
# ─────────────────────────────────────────────────────────────────────────────
def insert(
db: Database,
table_name: str,
record: dict[str, Any],
pk: str | None = None,
alter: bool = True,
replace: bool = False,
) -> Any:
"""
Insert a single record. Creates the table if it doesn't exist.
alter=True: adds missing columns automatically.
replace=True: replace row if PK conflicts.
Returns last row id.
"""
tbl = db[table_name]
kwargs: dict = {"alter": alter}
if pk:
kwargs["pk"] = pk
if replace:
kwargs["replace"] = True
return tbl.insert(record, **kwargs).last_pk # type: ignore[union-attr]
def insert_all(
db: Database,
table_name: str,
records: Iterable[dict[str, Any]],
pk: str | None = None,
batch_size: int = 1000,
alter: bool = True,
) -> int:
"""
Bulk insert records. Returns count inserted.
batch_size: commit every N records.
"""
tbl = db[table_name]
kwargs: dict = {"alter": alter, "batch_size": batch_size}
if pk:
kwargs["pk"] = pk
tbl.insert_all(records, **kwargs) # type: ignore[union-attr]
return tbl.count # type: ignore[union-attr]
def upsert(
db: Database,
table_name: str,
record: dict[str, Any],
pk: str,
alter: bool = True,
) -> Any:
"""
Insert or update a record by primary key.
Returns last row pk.
"""
tbl = db[table_name]
return tbl.upsert(record, pk=pk, alter=alter).last_pk # type: ignore[union-attr]
def upsert_all(
db: Database,
table_name: str,
records: Iterable[dict[str, Any]],
pk: str,
batch_size: int = 1000,
alter: bool = True,
) -> int:
"""Bulk upsert by pk. Returns row count after operation."""
tbl = db[table_name]
tbl.upsert_all(records, pk=pk, alter=alter, batch_size=batch_size) # type: ignore[union-attr]
return tbl.count # type: ignore[union-attr]
# ─────────────────────────────────────────────────────────────────────────────
# 3. Query helpers
# ─────────────────────────────────────────────────────────────────────────────
def query(
db: Database,
table_name: str,
where: str | None = None,
params: list | None = None,
order_by: str | None = None,
limit: int | None = None,
offset: int | None = None,
select: list[str] | None = None,
) -> list[dict[str, Any]]:
"""
Query rows from a table with optional WHERE/ORDER/LIMIT.
Returns list of row dicts.
Example:
rows = query(db, "users", "age > ? AND active = ?", [25, True], limit=10)
"""
tbl = db[table_name]
kwargs: dict = {}
if where:
kwargs["where"] = where
if params:
kwargs["where_args"] = params
if order_by:
kwargs["order_by"] = order_by
if limit is not None:
kwargs["limit"] = limit
if offset is not None:
kwargs["offset"] = offset
if select:
kwargs["select"] = ", ".join(select)
return list(tbl.rows_where(**kwargs)) # type: ignore[union-attr]
def get_by_pk(db: Database, table_name: str, pk_value: Any) -> dict | None:
"""Fetch a single row by primary key. Returns None if not found."""
try:
return db[table_name].get(pk_value) # type: ignore[union-attr]
except Exception:
return None
def execute_sql(
db: Database,
sql: str,
params: list | None = None,
) -> list[dict[str, Any]]:
"""Execute raw SQL and return results as list of dicts."""
cursor = db.execute(sql, params or [])
cols = [d[0] for d in cursor.description] if cursor.description else []
return [dict(zip(cols, row)) for row in cursor.fetchall()]
# ─────────────────────────────────────────────────────────────────────────────
# 4. Full-text search
# ─────────────────────────────────────────────────────────────────────────────
def enable_fts(
db: Database,
table_name: str,
columns: list[str],
tokenize: str = "porter ascii",
) -> None:
"""
Enable FTS5 full-text search on the specified columns.
Idempotent — safe to call multiple times.
tokenize: "porter ascii" (stemming) or "unicode61" or "ascii".
"""
tbl = db[table_name]
if not tbl.detect_fts(): # type: ignore[union-attr]
tbl.enable_fts(columns, tokenize=tokenize) # type: ignore[union-attr]
def search(
db: Database,
table_name: str,
query_str: str,
limit: int = 20,
columns: list[str] | None = None,
) -> list[dict[str, Any]]:
"""
FTS5 search. Requires enable_fts() called first.
columns: subset of columns to search (default = all FTS columns).
Example:
rows = search(db, "articles", "python asyncio", limit=10)
"""
tbl = db[table_name]
kwargs: dict = {"limit": limit}
if columns:
kwargs["columns"] = columns
return list(tbl.search(query_str, **kwargs)) # type: ignore[union-attr]
# ─────────────────────────────────────────────────────────────────────────────
# 5. Schema & index helpers
# ─────────────────────────────────────────────────────────────────────────────
def create_index(
db: Database,
table_name: str,
columns: list[str],
unique: bool = False,
if_not_exists: bool = True,
) -> None:
"""Create an index on the specified columns."""
db[table_name].create_index(columns, unique=unique, if_not_exists=if_not_exists) # type: ignore[union-attr]
def get_schema(db: Database, table_name: str) -> str:
"""Return the CREATE TABLE SQL for a table."""
return db[table_name].schema # type: ignore[union-attr]
def table_names(db: Database) -> list[str]:
"""Return all table names in the database."""
return db.table_names()
def row_count(db: Database, table_name: str) -> int:
"""Return the number of rows in a table."""
return db[table_name].count # type: ignore[union-attr]
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
db = open_db() # in-memory
print("=== Insert people ===")
people = [
{"id": 1, "name": "Alice Johnson", "age": 32, "city": "New York", "active": True},
{"id": 2, "name": "Bob Martinez", "age": 27, "city": "Chicago", "active": True},
{"id": 3, "name": "Carol Williams", "age": 45, "city": "Seattle", "active": False},
{"id": 4, "name": "David Chen", "age": 29, "city": "New York", "active": True},
{"id": 5, "name": "Eve Thompson", "age": 38, "city": "Boston", "active": True},
]
n = insert_all(db, "people", people, pk="id")
print(f" Inserted: {n} rows")
print("\n=== Query ===")
ny_active = query(db, "people", "city = ? AND active = ?", ["New York", True])
print(f" NYC active: {[r['name'] for r in ny_active]}")
over_30 = query(db, "people", "age > ?", [30], order_by="age", limit=5)
print(f" Over 30: {[(r['name'], r['age']) for r in over_30]}")
print("\n=== Upsert ===")
upsert(db, "people", {"id": 2, "name": "Bob Martinez", "age": 28, "city": "Chicago", "active": True}, pk="id")
bob = get_by_pk(db, "people", 2)
print(f" Bob after upsert: age={bob['age']}")
print("\n=== FTS search ===")
enable_fts(db, "people", ["name", "city"])
results = search(db, "people", "New York", limit=5)
print(f" FTS 'New York': {[r['name'] for r in results]}")
print("\n=== Index + Schema ===")
create_index(db, "people", ["city"])
print(f" Tables: {table_names(db)}")
print(f" people count: {row_count(db, 'people')}")
print("\n=== Raw SQL ===")
rows = execute_sql(db, "SELECT city, COUNT(*) as n FROM people GROUP BY city ORDER BY n DESC")
for r in rows:
print(f" {r['city']}: {r['n']}")
For the SQLAlchemy alternative — SQLAlchemy requires schema definitions (declarative models or Core Table objects) before inserting data; sqlite-utils accepts plain Python dicts and creates the table schema automatically from the keys — ideal for rapid ETL pipelines, data journalism, and ad-hoc exploration where the schema is unknown upfront. For the sqlite3 stdlib alternative — sqlite3 requires writing raw SQL for every operation; sqlite-utils wraps it with a higher-level API (insert_all(rows, pk="id")), adds automatic schema creation, FTS5 search, create_index(), and a powerful CLI (sqlite-utils insert db.db table data.json) — use sqlite3 for simple queries, sqlite-utils when you need to load and query structured data quickly. The Claude Skills 360 bundle includes sqlite-utils skill sets covering open_db() with WAL mode, insert()/insert_all()/upsert()/upsert_all() with alter, query() with where/order/limit, get_by_pk()/execute_sql(), enable_fts()/search() FTS5 pipeline, create_index()/get_schema(), and CLI usage for insert/query/convert. Start with the free tier to try SQLite data pipeline code generation.