glom accesses, transforms, and validates nested Python data structures. pip install glom. Basic: from glom import glom; glom(data, "a.b.c") — dot-path into nested dicts. glom(data, ("a", "b")) — tuple path. Path: from glom import Path; glom(data, Path("a", 0, "name")). Default: glom(data, "x.y", default=None). Coalesce: from glom import Coalesce; glom(data, Coalesce("a.b", "a.c", default="")). Assign: from glom import Assign; glom(data, Assign("a.b", 42)). T: from glom import T; glom(data, T.upper()) — call methods. Iter: from glom import Iter; glom(data, ("items", Iter("name").all())). Merge: from glom import Merge; glom([{"a":1}, {"b":2}], Merge()). Match/Check: from glom import Match, Check; glom(data, Match({"name": str, "age": Check(int, gt=0)})). Fill: from glom import Fill; glom(data, Fill({"id": "user.id", "label": T.upper()})). Auto: glom(data, Auto(target=MyDataclass)). Flatten: from glom import flatten; flatten([[1,2],[3]]). Register: register(MyType, glom_op). glom(spec=...). Auto-spec via glom.glom. Claude Code generates glom extraction pipelines, nested API response parsers, and data reshaping utilities.
CLAUDE.md for glom
## glom Stack
- Version: glom >= 23.5 | pip install glom
- Access: glom(data, "a.b.c") | glom(data, ("a", 0, "name"))
- Default: glom(data, "x.y", default=None)
- Coalesce: Coalesce("path1", "path2", default=val) — first non-missing wins
- Assign: Assign("a.b", value) — deep mutation
- T: glom(obj, T.method().attr) — method chaining on nested values
- Iter: glom(data, ("list_key", Iter("field").all())) — transform nested lists
glom Nested Data Pipeline
# app/glom_utils.py — glom path access, coalesce, assign, iter, match, fill, merge
from __future__ import annotations
from typing import Any
from glom import (
Assign,
Check,
Coalesce,
Fill,
Iter,
Match,
Merge,
Path,
T,
TType,
flatten,
glom,
)
# ─────────────────────────────────────────────────────────────────────────────
# 1. Core access helpers
# ─────────────────────────────────────────────────────────────────────────────
def get(obj: Any, path: str, default: Any = None) -> Any:
"""
Access a nested value by dot-path.
Returns default if any key is missing.
Example:
get({"a": {"b": 42}}, "a.b") → 42
get({"a": {}}, "a.b") → None
"""
return glom(obj, path, default=default)
def get_path(obj: Any, *keys: str | int, default: Any = None) -> Any:
"""
Access a nested value by explicit key sequence.
Supports mixed string/int keys for list indexing.
Example:
get_path(data, "users", 0, "email")
"""
return glom(obj, Path(*keys), default=default)
def coalesce(obj: Any, *paths: str, default: Any = None) -> Any:
"""
Return the first path that resolves to a non-missing value.
Example:
coalesce(r, "user.display_name", "user.username", "user.id")
"""
return glom(obj, Coalesce(*paths, default=default))
def pluck(obj: Any, *paths: str, default: Any = None) -> dict[str, Any]:
"""
Extract multiple paths from obj into a flat dict.
Example:
pluck(user, "id", "profile.email", "settings.theme")
→ {"id": 1, "profile.email": "[email protected]", "settings.theme": "dark"}
"""
return {path: get(obj, path, default=default) for path in paths}
# ─────────────────────────────────────────────────────────────────────────────
# 2. Mutation helpers
# ─────────────────────────────────────────────────────────────────────────────
def assign(obj: Any, path: str, value: Any) -> Any:
"""
Set a nested value in obj (in-place). Returns obj.
Example:
assign(config, "database.pool_size", 10)
"""
glom(obj, Assign(path, value))
return obj
def assign_many(obj: Any, updates: dict[str, Any]) -> Any:
"""Apply multiple path → value assignments in sequence."""
for path, value in updates.items():
assign(obj, path, value)
return obj
# ─────────────────────────────────────────────────────────────────────────────
# 3. List / Iter transformations
# ─────────────────────────────────────────────────────────────────────────────
def pluck_list(items: list[Any], path: str) -> list[Any]:
"""
Extract a field from every item in a list.
Example:
pluck_list(users, "email") → ["[email protected]", "[email protected]", ...]
"""
return [get(item, path) for item in items]
def collect(obj: Any, list_path: str, field_path: str) -> list[Any]:
"""
Traverse list_path then collect field_path from each element.
Example:
collect(data, "orders", "total") → [9.99, 19.99, ...]
"""
return glom(obj, (list_path, Iter(field_path).all()))
def transform_list(obj: Any, list_path: str, spec: Any) -> list[Any]:
"""Apply any glom spec to each element of a nested list."""
return glom(obj, (list_path, Iter(spec).all()))
def reshape(items: list[dict], mapping: dict[str, str]) -> list[dict]:
"""
Reshape a list of dicts by remapping keys.
mapping: {new_key: source_path, ...}
Example:
reshape(rows, {"name": "first_name", "email": "contact.email"})
"""
result = []
for item in items:
result.append({new_key: get(item, src) for new_key, src in mapping.items()})
return result
def flatten_nested(obj: Any, path: str) -> list[Any]:
"""Flatten a nested list at path into a single list."""
nested = get(obj, path, default=[])
return flatten(nested)
def group_by(items: list[dict], key_path: str) -> dict[Any, list[dict]]:
"""Group a list of dicts by the value at key_path."""
groups: dict[Any, list[dict]] = {}
for item in items:
k = get(item, key_path)
groups.setdefault(k, []).append(item)
return groups
# ─────────────────────────────────────────────────────────────────────────────
# 4. Fill — template-based extraction
# ─────────────────────────────────────────────────────────────────────────────
def extract(obj: Any, template: dict[str, Any] | list[Any]) -> Any:
"""
Build a new structure from obj using a template.
Template values can be path strings or glom specs.
Example:
extract(user, {
"id": "id",
"email": "contact.email",
"name": T["first"] + " " + T["last"],
})
"""
def _resolve(spec):
if isinstance(spec, str):
return get(obj, spec)
return glom(obj, spec)
if isinstance(template, dict):
return {k: _resolve(v) for k, v in template.items()}
return [_resolve(v) for v in template]
# ─────────────────────────────────────────────────────────────────────────────
# 5. Merge helpers
# ─────────────────────────────────────────────────────────────────────────────
def merge_dicts(*dicts: dict) -> dict:
"""
Deep-merge multiple dicts using glom Merge.
Later keys win over earlier ones.
"""
return glom(list(dicts), Merge())
def merge_defaults(base: dict, defaults: dict) -> dict:
"""Apply defaults to base — base values win (like dict.setdefault for nested keys)."""
return merge_dicts(defaults, base)
# ─────────────────────────────────────────────────────────────────────────────
# 6. Match / validation
# ─────────────────────────────────────────────────────────────────────────────
def validate(obj: Any, schema: Any) -> tuple[bool, str]:
"""
Validate obj against a glom Match schema.
Returns (True, "") on success or (False, error_message).
Example:
validate(user, {"name": str, "age": Check(int, gt=0)})
"""
try:
glom(obj, Match(schema))
return True, ""
except Exception as exc:
return False, str(exc)
def must_match(obj: Any, schema: Any) -> Any:
"""Validate and return obj; raise MatchError on failure."""
return glom(obj, Match(schema))
# ─────────────────────────────────────────────────────────────────────────────
# 7. API response helpers
# ─────────────────────────────────────────────────────────────────────────────
def unwrap_response(
response: dict,
data_path: str = "data",
meta_paths: list[str] | None = None,
) -> dict:
"""
Extract data + optional metadata from a nested API response.
Example response: {"data": {"users": [...]}, "meta": {"total": 50}}
unwrap_response(resp, "data.users", meta_paths=["meta.total"])
→ {"data": [...], "total": 50}
"""
result = {"data": get(response, data_path)}
for mp in (meta_paths or []):
key = mp.rsplit(".", 1)[-1]
result[key] = get(response, mp)
return result
def normalize_pagination(response: dict) -> dict:
"""
Extract common pagination fields from various API response shapes.
Handles: {total, page, per_page}, {count, next, previous}, {meta.total}.
"""
return {
"total": coalesce(response, "total", "count", "meta.total", default=None),
"page": coalesce(response, "page", "meta.page", default=1),
"per_page": coalesce(response, "per_page", "limit", "meta.per_page", default=20),
"next": get(response, "next"),
"previous": get(response, "previous"),
}
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== Basic access ===")
data = {"user": {"profile": {"name": "Alice", "age": 30}, "roles": ["admin", "user"]}}
print("name:", get(data, "user.profile.name"))
print("age:", get(data, "user.profile.age"))
print("missing:", get(data, "user.address.city", default="N/A"))
print("\n=== Path (mixed keys) ===")
data2 = {"orders": [{"id": 1, "total": 9.99}, {"id": 2, "total": 19.99}]}
print("first total:", get_path(data2, "orders", 0, "total"))
print("\n=== Coalesce ===")
user = {"username": "bob", "display_name": None}
print("label:", coalesce(user, "display_name", "username", default="Unknown"))
print("\n=== Collect from list ===")
catalog = {"products": [
{"sku": "A1", "price": 4.99},
{"sku": "B2", "price": 9.99},
{"sku": "C3", "price": 14.99},
]}
prices = collect(catalog, "products", "price")
print("prices:", prices)
print("\n=== Reshape ===")
rows = [
{"first_name": "Alice", "last_name": "Smith", "contact": {"email": "[email protected]"}},
{"first_name": "Bob", "last_name": "Jones", "contact": {"email": "[email protected]"}},
]
reshaped = reshape(rows, {"name": "first_name", "email": "contact.email"})
for r in reshaped:
print(f" {r}")
print("\n=== Pluck (multi-path) ===")
profile = {"id": 42, "profile": {"email": "[email protected]"}, "settings": {"theme": "dark"}}
extracted = pluck(profile, "id", "profile.email", "settings.theme")
print(extracted)
print("\n=== Assign (deep set) ===")
config = {"database": {"host": "localhost", "port": 5432}}
assign(config, "database.port", 5433)
print("Updated port:", config["database"]["port"])
print("\n=== Merge ===")
defaults = {"debug": False, "workers": 4, "log_level": "INFO"}
overrides = {"workers": 8, "log_level": "DEBUG"}
merged = merge_defaults(overrides, defaults)
print("Merged:", merged)
print("\n=== Validate ===")
schema = {"name": str, "age": Check(int, gt=0)}
ok, err = validate({"name": "Alice", "age": 30}, schema)
print("Valid:", ok)
ok2, err2 = validate({"name": "Bob", "age": -1}, schema)
print("Invalid:", ok2, err2[:60])
print("\n=== API response unwrap ===")
api_resp = {"data": {"users": [{"id": 1}, {"id": 2}]}, "meta": {"total": 2}}
result = unwrap_response(api_resp, "data.users", meta_paths=["meta.total"])
print(result)
For the jmespath alternative — jmespath implements the JMESPath query language for JSON, which is powerful for filtering and projecting JSON arrays; glom is Python-native (works on any object, not just JSON-serializable dicts), supports assignment, method chaining via T, Match validation, and Coalesce fallback in a single library. For the jsonpath-ng alternative — jsonpath-ng implements RFC JSONPath and is good for standards-compliant JSON querying; glom is Pythonic and integrates tightly with dataclasses, Pydantic models, and arbitrary Python objects in ways that JSONPath doesn’t, making it better for internal data pipelines. The Claude Skills 360 bundle includes glom skill sets covering glom() dot-path access, get()/get_path() helpers, coalesce() fallback chains, pluck() multi-path extraction, assign()/assign_many() deep mutation, collect()/transform_list() nested list ops, reshape() key remapping, flatten_nested(), group_by(), extract() template-based extraction, merge_dicts()/merge_defaults(), validate()/must_match() schema checking, unwrap_response() and normalize_pagination() API helpers. Start with the free tier to try nested data transformation code generation.