tablib manages tabular datasets with multi-format export. pip install tablib. Install formats: pip install tablib[all] (xlsx, ods, yaml, pandas). Basic: import tablib; data = tablib.Dataset(); data.headers = ["Name","Age"]; data.append(["Alice", 30]). From list: tablib.Dataset(*rows, headers=["Name","Age"]). Export: data.export("csv"). data.export("json"). data.export("yaml"). data.export("xlsx") → bytes. data.export("tsv"). data.export("df") → pandas DataFrame. Import: data = tablib.Dataset().load("name,age\nAlice,30"). tablib.import_set(csv_string, headers=True). Slice: data[0] → first row tuple. data["Name"] → column list. Filter: data.filter(lambda row: row[1] > 25). Append multiple: data.extend(rows). Sort: data.sort("Age"). data.reverse(). Dynamic col: data.add_column("Senior", col=lambda row: row["Age"] >= 65). Stack: data.stack(other_dataset). Wipe: data.wipe(). Height: data.height — row count. Width: data.width — col count. Databook: book = tablib.Databook(); book.add_sheet(data); book.export("xlsx"). Invalid: data.valid_rows / data.invalid_rows with validators. Claude Code generates tablib export pipelines, Databook multi-sheet workbooks, and format conversion utilities.
CLAUDE.md for tablib
## tablib Stack
- Version: tablib >= 3.5 | pip install "tablib[all]"
- Create: Dataset(*rows, headers=[...]) | dataset.append(row)
- Export: data.export("csv") | "json" | "yaml" | "xlsx" → bytes or str
- Import: tablib.Dataset().load(csv_str) | import_set(str)
- Databook: book = Databook(); book.add_sheet(ds); book.export("xlsx")
- Column: data["col_name"] → list | data.add_column("label", col=lambda r: ...)
tablib Multi-Format Data Pipeline
# app/data_export.py — tablib Dataset, Databook, format export, filter, and conversion
from __future__ import annotations
import io
import json
from typing import Any, Callable
import tablib
# ─────────────────────────────────────────────────────────────────────────────
# 1. Dataset builders
# ─────────────────────────────────────────────────────────────────────────────
def make_dataset(
headers: list[str],
rows: list[list | tuple] | None = None,
title: str | None = None,
) -> tablib.Dataset:
"""
Create a tablib Dataset with optional rows and title.
title: sheet name when exported to multi-sheet workbooks.
Example:
ds = make_dataset(["Name", "Score"], [["Alice", 95], ["Bob", 72]])
"""
ds = tablib.Dataset(headers=headers, title=title)
for row in (rows or []):
ds.append(list(row))
return ds
def dataset_from_dicts(
records: list[dict[str, Any]],
headers: list[str] | None = None,
title: str | None = None,
) -> tablib.Dataset:
"""
Build a Dataset from a list of dicts.
headers: if None, uses keys from first record.
Example:
ds = dataset_from_dicts([{"name": "Alice", "score": 95}])
"""
if not records:
cols = headers or []
return tablib.Dataset(headers=cols, title=title)
cols = headers or list(records[0].keys())
ds = tablib.Dataset(headers=cols, title=title)
for rec in records:
ds.append([rec.get(h) for h in cols])
return ds
def dataset_to_dicts(ds: tablib.Dataset) -> list[dict[str, Any]]:
"""Convert a Dataset to a list of dicts."""
if not ds.headers:
return [dict(enumerate(row)) for row in ds]
return [dict(zip(ds.headers, row)) for row in ds]
# ─────────────────────────────────────────────────────────────────────────────
# 2. Export helpers
# ─────────────────────────────────────────────────────────────────────────────
def to_csv(ds: tablib.Dataset) -> str:
"""Export to CSV string."""
return ds.export("csv")
def to_tsv(ds: tablib.Dataset) -> str:
"""Export to TSV string."""
return ds.export("tsv")
def to_json(ds: tablib.Dataset, indent: int | None = 2) -> str:
"""Export to JSON string."""
raw = ds.export("json")
if indent is not None:
data = json.loads(raw)
return json.dumps(data, indent=indent)
return raw
def to_xlsx(ds: tablib.Dataset) -> bytes:
"""Export to Excel XLSX bytes."""
return ds.export("xlsx")
def to_yaml(ds: tablib.Dataset) -> str:
"""Export to YAML string. Requires: pip install pyyaml"""
return ds.export("yaml")
def to_format(ds: tablib.Dataset, fmt: str) -> str | bytes:
"""
Export to any supported format by name.
fmt: "csv", "tsv", "json", "yaml", "xlsx", "ods", "df"
"""
return ds.export(fmt)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Import helpers
# ─────────────----------------------------------------------------------------
def from_csv(csv_str: str, headers: bool = True) -> tablib.Dataset:
"""Load a Dataset from a CSV string."""
ds = tablib.Dataset()
ds.csv = csv_str
return ds
def from_json(json_str: str) -> tablib.Dataset:
"""Load a Dataset from a JSON string (list of dicts format)."""
ds = tablib.Dataset()
ds.json = json_str
return ds
def from_xlsx(xlsx_bytes: bytes) -> tablib.Dataset:
"""Load the first sheet of an XLSX file into a Dataset."""
ds = tablib.Dataset()
ds.xlsx = xlsx_bytes
return ds
def load_csv_file(path: str, encoding: str = "utf-8") -> tablib.Dataset:
"""Load a CSV file from disk."""
from pathlib import Path
return from_csv(Path(path).read_text(encoding=encoding))
# ─────────────────────────────────────────────────────────────────────────────
# 4. Dataset operations
# ─────────────────────────────────────────────────────────────────────────────
def filter_rows(
ds: tablib.Dataset,
predicate: Callable[[dict], bool],
) -> tablib.Dataset:
"""
Return a new Dataset containing only rows where predicate returns True.
predicate receives each row as a dict keyed by header name.
Example:
active = filter_rows(ds, lambda r: r["status"] == "active")
"""
out = tablib.Dataset(headers=ds.headers, title=ds.title)
headers = ds.headers or []
for row in ds:
row_dict = dict(zip(headers, row))
if predicate(row_dict):
out.append(list(row))
return out
def add_computed_column(
ds: tablib.Dataset,
name: str,
fn: Callable[[dict], Any],
) -> tablib.Dataset:
"""
Add a computed column to a copy of the dataset.
fn receives each row as a dict keyed by header, returns the column value.
Example:
ds2 = add_computed_column(ds, "total", lambda r: r["price"] * r["qty"])
"""
headers = ds.headers or []
out = tablib.Dataset(headers=list(headers) + [name], title=ds.title)
for row in ds:
row_dict = dict(zip(headers, row))
out.append(list(row) + [fn(row_dict)])
return out
def stack_datasets(*datasets: tablib.Dataset) -> tablib.Dataset:
"""
Vertically concatenate multiple Datasets with the same headers.
Returns a new Dataset with all rows combined.
"""
if not datasets:
return tablib.Dataset()
result = tablib.Dataset(headers=datasets[0].headers, title=datasets[0].title)
for ds in datasets:
for row in ds:
result.append(list(row))
return result
def pivot_to_databook(
records: list[dict[str, Any]],
group_by: str,
columns: list[str],
) -> tablib.Databook:
"""
Group records by `group_by` field and create one Dataset sheet per group.
Returns a Databook for multi-sheet Excel export.
Example:
book = pivot_to_databook(sales, "region", ["name", "revenue"])
xlsx = book.export("xlsx")
"""
from collections import defaultdict
groups: dict[str, list] = defaultdict(list)
for rec in records:
key = str(rec.get(group_by, "Other"))
groups[key].append([rec.get(c) for c in columns])
book = tablib.Databook()
for group_key in sorted(groups):
ds = tablib.Dataset(*groups[group_key], headers=columns, title=group_key[:31])
book.add_sheet(ds)
return book
# ─────────────────────────────────────────────────────────────────────────────
# 5. Multi-format export pipeline
# ─────────────────────────────────────────────────────────────────────────────
def export_all_formats(
ds: tablib.Dataset,
output_dir: str,
formats: list[str] | None = None,
base_name: str = "export",
) -> dict[str, str]:
"""
Export a Dataset to all requested formats and save to output_dir.
Returns dict {fmt: file_path}.
formats: defaults to ["csv", "json", "xlsx"]
"""
from pathlib import Path
out = Path(output_dir)
out.mkdir(parents=True, exist_ok=True)
fmt_list = formats or ["csv", "json", "xlsx"]
ext_map = {"xlsx": "xlsx", "csv": "csv", "json": "json",
"yaml": "yaml", "tsv": "tsv", "ods": "ods"}
results = {}
for fmt in fmt_list:
ext = ext_map.get(fmt, fmt)
path = out / f"{base_name}.{ext}"
data = ds.export(fmt)
if isinstance(data, bytes):
path.write_bytes(data)
else:
path.write_text(data, encoding="utf-8")
results[fmt] = str(path)
return results
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
from pathlib import Path
sales = [
{"name": "Alice Johnson", "region": "West", "revenue": 142500.0, "units": 48},
{"name": "Bob Martinez", "region": "East", "revenue": 98750.0, "units": 35},
{"name": "Carol Williams", "region": "Central", "revenue": 175200.0, "units": 60},
{"name": "David Chen", "region": "North", "revenue": 61000.0, "units": 22},
{"name": "Eve Thompson", "region": "South", "revenue": 119400.0, "units": 41},
{"name": "Frank Lee", "region": "West", "revenue": 88200.0, "units": 30},
]
print("=== Build dataset ===")
ds = dataset_from_dicts(sales, title="Sales")
print(f" {ds.height} rows × {ds.width} cols")
print(f" Headers: {ds.headers}")
print("\n=== Add computed column ===")
ds2 = add_computed_column(
ds, "avg_price",
lambda r: round(r["revenue"] / r["units"], 2) if r["units"] else 0,
)
print(f" Headers now: {ds2.headers}")
print(f" First row: {ds2[0]}")
print("\n=== Filter rows ===")
high = filter_rows(ds, lambda r: r["revenue"] >= 100_000)
print(f" Revenue ≥ $100k: {high.height} rows")
for row in high:
print(f" {row[0]}: ${row[2]:,.0f}")
print("\n=== Export CSV ===")
csv_out = to_csv(ds)
print(csv_out[:200])
print("\n=== Export JSON ===")
json_out = to_json(ds)
print(json_out[:200])
print("\n=== Export all formats ===")
paths = export_all_formats(ds, "/tmp/tablib_demo", formats=["csv", "json", "xlsx"])
for fmt, path in paths.items():
p = Path(path)
print(f" {fmt}: {p.name} ({p.stat().st_size:,} bytes)")
print("\n=== Databook (pivot by region) ===")
book = pivot_to_databook(sales, "region", ["name", "revenue", "units"])
xlsx = book.export("xlsx")
Path("/tmp/tablib_regional.xlsx").write_bytes(xlsx)
print(f" Databook XLSX: {len(xlsx):,} bytes (sheets: {len(book.sheets())})")
print("\n=== Stack datasets ===")
west_sales = filter_rows(ds, lambda r: r["region"] == "West")
east_sales = filter_rows(ds, lambda r: r["region"] == "East")
combined = stack_datasets(west_sales, east_sales)
print(f" Combined: {combined.height} rows")
print(to_csv(combined))
For the pandas alternative — pandas DataFrames are the standard for in-memory tabular analysis with groupby, merge, and vectorized operations; tablib is lighter (~30 kB) with no NumPy dependency and focuses on format conversion — data.export("json") / "csv" / "xlsx" / "yaml" — making it ideal for web APIs that need to return data in multiple formats from a single in-memory representation. For the csv / json / openpyxl stdlib/direct alternative — using the individual format libraries requires separate code paths for each format; tablib provides a unified Dataset.export(fmt) interface so adding a new output format (e.g., switching from CSV to Excel) is a one-word change, and the Databook API enables multi-sheet Excel without learning XlsxWriter’s worksheet API. The Claude Skills 360 bundle includes tablib skill sets covering make_dataset()/dataset_from_dicts()/dataset_to_dicts(), to_csv/json/xlsx/yaml/tsv/format export helpers, from_csv/json/xlsx import helpers, filter_rows() predicate filter, add_computed_column(), stack_datasets() vertical concat, pivot_to_databook() multi-sheet Excel, and export_all_formats() batch exporter. Start with the free tier to try multi-format data export code generation.