deepdiff compares nested Python objects — dicts, lists, sets, and custom classes. pip install deepdiff. Basic: from deepdiff import DeepDiff; diff = DeepDiff(t1, t2). Result keys: type_changes, values_changed, dictionary_item_added, dictionary_item_removed, iterable_item_added, iterable_item_removed, set_item_added, set_item_removed, attribute_added, attribute_removed. Ignore order: DeepDiff(t1, t2, ignore_order=True). Floats: DeepDiff(t1, t2, significant_digits=2). Exclude: DeepDiff(t1, t2, exclude_paths=["root['ts']"]). Regex exclude: exclude_regex_paths=[r"root\['\w+_at'\]"]. Flat view: DeepDiff(t1, t2, verbose_level=2). Text: diff.to_json(). Delta: from deepdiff import Delta; delta = Delta(diff); t1 + delta == t2. Reverse: t2 - delta == t1. DeepHash: from deepdiff import DeepHash; dh = DeepHash(obj); dh[obj]. Grep: from deepdiff import grep; result = t1 | grep("needle"). Custom: DeepDiff(t1, t2, custom_operators=[IsEmptyOperator()]). Report: diff.pretty(). Tree view: diff.tree. Claude Code generates deepdiff comparison helpers, test assertion utilities, and change-detection pipelines.
CLAUDE.md for deepdiff
## deepdiff Stack
- Version: deepdiff >= 6.7 | pip install deepdiff
- Compare: DeepDiff(t1, t2) → diff dict with change-type keys
- Ignore order: DeepDiff(t1, t2, ignore_order=True) — treat lists as sets
- Exclude: exclude_paths=["root['key']"] | exclude_regex_paths=[r"root\['ts'\]"]
- Floats: significant_digits=2 — tolerance-based float comparison
- Delta: Delta(diff) | t1 + delta → t2 | t2 - delta → t1
- DeepHash: DeepHash(obj)[obj] — stable content hash for any object
deepdiff Comparison Pipeline
# app/diff_utils.py — deepdiff comparison, delta, deephash, and test helpers
from __future__ import annotations
import json
import re
from typing import Any
from deepdiff import DeepDiff, DeepHash, Delta, grep
# ─────────────────────────────────────────────────────────────────────────────
# 1. Core comparison helpers
# ─────────────────────────────────────────────────────────────────────────────
def diff(
t1: Any,
t2: Any,
ignore_order: bool = False,
significant_digits: int | None = None,
exclude_paths: list[str] | None = None,
exclude_regex_paths: list[str] | None = None,
ignore_string_case: bool = False,
verbose_level: int = 1,
) -> DeepDiff:
"""
Compare two objects and return a DeepDiff result.
Returns an empty dict-like object when t1 == t2 (no diff).
"""
kwargs: dict[str, Any] = {
"ignore_order": ignore_order,
"verbose_level": verbose_level,
}
if significant_digits is not None:
kwargs["significant_digits"] = significant_digits
if exclude_paths:
kwargs["exclude_paths"] = exclude_paths
if exclude_regex_paths:
kwargs["exclude_regex_paths"] = exclude_regex_paths
if ignore_string_case:
kwargs["ignore_string_case"] = True
return DeepDiff(t1, t2, **kwargs)
def is_equal(
t1: Any,
t2: Any,
ignore_order: bool = False,
significant_digits: int | None = None,
) -> bool:
"""Return True if t1 and t2 are deeply equal."""
return not diff(
t1, t2,
ignore_order=ignore_order,
significant_digits=significant_digits,
)
def changed_paths(d: DeepDiff) -> list[str]:
"""Return a flat list of all changed paths from a DeepDiff result."""
paths: list[str] = []
for change_type, changes in d.items():
if isinstance(changes, dict):
paths.extend(changes.keys())
elif isinstance(changes, set):
paths.extend(str(x) for x in changes)
return sorted(paths)
def summary(d: DeepDiff) -> dict[str, int]:
"""Count changes by type."""
return {k: len(v) for k, v in d.items()}
def to_json(d: DeepDiff, indent: int = 2) -> str:
"""Serialize a DeepDiff result to JSON string."""
return d.to_json(indent=indent)
def from_json(json_str: str) -> DeepDiff:
"""Restore a DeepDiff result from its JSON representation."""
return DeepDiff.from_json(json_str)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Delta — apply and reverse diffs
# ─────────────────────────────────────────────────────────────────────────────
def make_delta(d: DeepDiff) -> Delta:
"""
Create a Delta from a DeepDiff result.
Delta supports + (apply forward) and - (apply backward) operators.
Example:
d = diff(old, new)
delt = make_delta(d)
assert old + delt == new
assert new - delt == old
"""
return Delta(d)
def apply_delta(obj: Any, d: DeepDiff) -> Any:
"""Apply a diff forward — transform obj toward the second value."""
return obj + make_delta(d)
def revert_delta(obj: Any, d: DeepDiff) -> Any:
"""Apply a diff backward — revert obj to the first value."""
return obj - make_delta(d)
def patch(original: Any, serialized_delta: str) -> Any:
"""
Apply a serialized delta (JSON string from delta.to_json()) to original.
Useful for transmitting diffs over a network.
"""
delt = Delta(delta_dict=json.loads(serialized_delta))
return original + delt
# ─────────────────────────────────────────────────────────────────────────────
# 3. DeepHash — stable content hashing
# ─────────────────────────────────────────────────────────────────────────────
def content_hash(obj: Any, ignore_order: bool = True) -> str:
"""
Compute a stable hash string for any Python object.
Useful for content-addressable storage, cache keys, and deduplication.
ignore_order=True: treats lists as sets when hashing.
"""
dh = DeepHash(obj, ignore_order=ignore_order)
return dh[obj]
def objects_have_same_content(
a: Any,
b: Any,
ignore_order: bool = True,
) -> bool:
"""Return True if a and b have the same content hash."""
return content_hash(a, ignore_order) == content_hash(b, ignore_order)
def deduplicate(items: list[Any], ignore_order: bool = True) -> list[Any]:
"""
Remove duplicate items from a list by content hash.
Preserves first occurrence order.
"""
seen: set[str] = set()
result: list[Any] = []
for item in items:
h = content_hash(item, ignore_order)
if h not in seen:
seen.add(h)
result.append(item)
return result
# ─────────────────────────────────────────────────────────────────────────────
# 4. Grep — search inside nested structures
# ─────────────────────────────────────────────────────────────────────────────
def search(obj: Any, needle: Any, verbose_level: int = 2) -> dict:
"""
Search for needle inside a nested structure using deepdiff grep.
Returns dict with 'matched_values' and 'matched_keys'.
Example:
result = search(data, "admin")
print(result["matched_values"]) # paths where value == "admin"
"""
return obj | grep(needle, verbose_level=verbose_level)
def find_paths(obj: Any, needle: Any) -> list[str]:
"""Return all paths where needle appears as a value."""
result = search(obj, needle)
matched = result.get("matched_values", {})
if hasattr(matched, "keys"):
return list(matched.keys())
return [str(x) for x in matched]
# ─────────────────────────────────────────────────────────────────────────────
# 5. Config / record diff helpers
# ─────────────────────────────────────────────────────────────────────────────
_TS_PATTERN = re.compile(r"_at$|_ts$|timestamp|created|updated")
def diff_configs(old: dict, new: dict) -> DeepDiff:
"""
Compare two config dicts, ignoring timestamp-like keys.
"""
return diff(
old, new,
exclude_regex_paths=[r"root\['" + _TS_PATTERN.pattern + r"'\]"],
)
def diff_records(
old: list[dict],
new: list[dict],
key: str = "id",
) -> dict[str, list[dict]]:
"""
Compare two lists of dicts (records) keyed by `key`.
Returns {'added': [...], 'removed': [...], 'changed': [...]}.
"""
old_map = {r[key]: r for r in old if key in r}
new_map = {r[key]: r for r in new if key in r}
added = [new_map[k] for k in new_map if k not in old_map]
removed = [old_map[k] for k in old_map if k not in new_map]
changed = [
{"id": k, "diff": diff(old_map[k], new_map[k])}
for k in old_map
if k in new_map and diff(old_map[k], new_map[k])
]
return {"added": added, "removed": removed, "changed": changed}
# ─────────────────────────────────────────────────────────────────────────────
# 6. Test assertion helpers
# ─────────────────────────────────────────────────────────────────────────────
class DiffAssertions:
"""
Mixin for test classes — provides assert_deep_equal and assert_changed.
Usage (pytest / unittest):
class TestMyService(DiffAssertions):
def test_update(self):
self.assert_deep_equal(result, expected, ignore_order=True)
"""
def assert_deep_equal(
self,
actual: Any,
expected: Any,
ignore_order: bool = False,
significant_digits: int | None = None,
exclude_paths: list[str] | None = None,
msg: str | None = None,
) -> None:
d = diff(
expected, actual,
ignore_order=ignore_order,
significant_digits=significant_digits,
exclude_paths=exclude_paths,
)
if d:
error = msg or f"Objects differ:\n{d.pretty()}"
raise AssertionError(error)
def assert_changed(
self,
before: Any,
after: Any,
expected_changes: list[str],
) -> None:
"""Assert that exactly the listed paths changed."""
d = diff(before, after)
actual = set(changed_paths(d))
expected = set(expected_changes)
missing = expected - actual
extra = actual - expected
if missing or extra:
raise AssertionError(
f"Change mismatch.\nMissing: {missing}\nUnexpected: {extra}\nDiff: {d.pretty()}"
)
def assert_deep_equal(
actual: Any,
expected: Any,
ignore_order: bool = False,
significant_digits: int | None = None,
exclude_paths: list[str] | None = None,
) -> None:
"""Standalone assert — raises AssertionError with pretty diff on failure."""
DiffAssertions().assert_deep_equal(
actual, expected,
ignore_order=ignore_order,
significant_digits=significant_digits,
exclude_paths=exclude_paths,
)
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== Basic diff ===")
t1 = {"name": "Alice", "age": 30, "scores": [10, 20, 30], "city": "NYC"}
t2 = {"name": "Alice", "age": 31, "scores": [10, 25, 30], "country": "US"}
d = diff(t1, t2)
print(d.pretty())
print("Summary:", summary(d))
print("\n=== Ignore order ===")
a = {"tags": ["python", "api", "fast"]}
b = {"tags": ["fast", "python", "api"]}
print("With order: ", bool(diff(a, b)))
print("Ignore order: ", bool(diff(a, b, ignore_order=True)))
print("\n=== Float tolerance ===")
x = {"pi": 3.14159}
y = {"pi": 3.14200}
print("Exact: ", bool(diff(x, y)))
print("2 sig digits:", bool(diff(x, y, significant_digits=2)))
print("\n=== Delta (apply + revert) ===")
old = {"version": 1, "config": {"debug": False, "workers": 4}}
new = {"version": 2, "config": {"debug": True, "workers": 8}}
d = diff(old, new)
restored = revert_delta(new, d)
print("Applied delta → old:", apply_delta(old, d) == new)
print("Reverted delta → old:", restored == old)
print("\n=== DeepHash deduplication ===")
records = [
{"id": 1, "val": "a"},
{"id": 2, "val": "b"},
{"id": 1, "val": "a"}, # duplicate
{"id": 3, "val": "c"},
]
unique = deduplicate(records)
print(f"Original: {len(records)}, Unique: {len(unique)}")
print("\n=== Grep / search ===")
data = {"users": [{"role": "admin", "name": "Bob"}, {"role": "user", "name": "Alice"}]}
paths = find_paths(data, "admin")
print(f"'admin' found at: {paths}")
print("\n=== Record diff ===")
old_records = [
{"id": 1, "name": "Alice", "age": 30},
{"id": 2, "name": "Bob", "age": 25},
{"id": 3, "name": "Carol", "age": 28},
]
new_records = [
{"id": 1, "name": "Alice", "age": 31}, # changed
{"id": 3, "name": "Carol", "age": 28}, # unchanged
{"id": 4, "name": "Dave", "age": 35}, # added
]
changes = diff_records(old_records, new_records)
print(f" Added: {[r['id'] for r in changes['added']]}")
print(f" Removed: {[r['id'] for r in changes['removed']]}")
print(f" Changed: {[c['id'] for c in changes['changed']]}")
For the jsondiff alternative — jsondiff is JSON-specific and works well for simple JSON string diffs; deepdiff handles arbitrary Python objects (dataclasses, custom classes, sets, tuples, numpy arrays) with richer change metadata, Delta for patching, and DeepHash for content-based hashing. For the dictdiffer alternative — dictdiffer is lightweight and produces patch-format output; deepdiff gives structured change types (values_changed, dictionary_item_added, type_changes), supports ignore_order, significant_digits for floats, and supports reversible Deltas — making it better suited for data pipelines and test assertions. The Claude Skills 360 bundle includes deepdiff skill sets covering DeepDiff() core comparison, ignore_order/significant_digits/exclude_paths options, changed_paths()/summary() helpers, to_json()/from_json() serialization, Delta make/apply/revert, content_hash() and deduplicate() via DeepHash, search()/find_paths() grep, diff_configs() with timestamp exclusion, diff_records() keyed record comparison, and DiffAssertions test mixin. Start with the free tier to try deep comparison code generation.