Python’s difflib module computes differences between sequences and generates human-readable diffs. import difflib. SequenceMatcher: sm = difflib.SequenceMatcher(None, a, b) — isjunk is first arg (None = auto); .ratio() → float [0,1] similarity; .quick_ratio() and .real_quick_ratio() for fast upper bounds. get_matching_blocks: sm.get_matching_blocks() → list of Match(a, b, size) named tuples; last entry is always Match(len_a, len_b, 0). get_opcodes: sm.get_opcodes() → list of (tag, i1, i2, j1, j2) where tag is ‘equal’, ‘replace’, ‘insert’, ‘delete’. unified_diff: difflib.unified_diff(a, b, fromfile="", tofile="", lineterm="\n", n=3) → iterator of unified diff lines (the -/+/@@ @@ format). context_diff: same signature, produces !/+/- context format. ndiff: difflib.ndiff(a, b) → Differ-formatted lines with ? intra-line markers hinting character-level changes. HtmlDiff: difflib.HtmlDiff().make_file(a, b) → full HTML page; .make_table(a, b) → table fragment. get_close_matches: difflib.get_close_matches(word, possibilities, n=3, cutoff=0.6) → list of closest matches at or above cutoff. Differ: list(difflib.Differ().compare(a, b)) → verbose diff with + , - , , ? prefixes. restore: difflib.restore(diff, 1) → original sequence a; restore(diff, 2) → b. Claude Code generates config file diff reporters, spell-check fuzzy matchers, patch generators, and code review diff tools.
CLAUDE.md for difflib
## difflib Stack
- Stdlib: import difflib
- Ratio: difflib.SequenceMatcher(None, a, b).ratio()
- Opcodes: sm.get_opcodes() # ('equal'/'replace'/'insert'/'delete', ...)
- Patch: list(difflib.unified_diff(lines_a, lines_b, fromfile="a", tofile="b"))
- Fuzzy: difflib.get_close_matches(word, vocab, n=3, cutoff=0.6)
- HTML: difflib.HtmlDiff().make_table(lines_a, lines_b)
difflib Comparison and Patch Pipeline
# app/diffutil.py — similarity, fuzzy match, patch gen, change summary, HTML
from __future__ import annotations
import difflib
import io
from dataclasses import dataclass
from pathlib import Path
from typing import Any
# ─────────────────────────────────────────────────────────────────────────────
# 1. Similarity scoring
# ─────────────────────────────────────────────────────────────────────────────
def similarity(a: str, b: str) -> float:
"""
Return a similarity ratio in [0.0, 1.0] between strings a and b.
1.0 = identical, 0.0 = completely different.
Example:
similarity("kitten", "sitting") # ~0.615
similarity("hello", "hello") # 1.0
"""
return difflib.SequenceMatcher(None, a, b).ratio()
def similarity_lines(a: list[str], b: list[str]) -> float:
"""
Return similarity ratio for two lists of lines.
Example:
similarity_lines(old_lines, new_lines) # fraction of lines unchanged
"""
return difflib.SequenceMatcher(None, a, b).ratio()
def most_similar(query: str, candidates: list[str], n: int = 5) -> list[tuple[str, float]]:
"""
Return up to n candidates sorted by similarity to query (descending).
Example:
most_similar("colour", ["color", "colon", "collar", "dollar"])
# [('color', 0.909), ('collar', 0.667), ('colon', 0.556), ('dollar', 0.364)]
"""
scored = [(c, similarity(query, c)) for c in candidates]
return sorted(scored, key=lambda x: -x[1])[:n]
def fuzzy_match(word: str, possibilities: list[str], n: int = 3, cutoff: float = 0.6) -> list[str]:
"""
Return up to n close matches for word from possibilities.
Wraps difflib.get_close_matches.
Example:
fuzzy_match("colour", ["color", "collar", "colon"]) # ['color', 'collar']
"""
return difflib.get_close_matches(word, possibilities, n=n, cutoff=cutoff)
def approx_contains(query: str, corpus: list[str], cutoff: float = 0.8) -> bool:
"""
Return True if any item in corpus has similarity >= cutoff with query.
Example:
approx_contains("colour", ["color", "shape"]) # True
"""
return any(similarity(query, c) >= cutoff for c in corpus)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Opcode-level change analysis
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ChangeStats:
equal_chars: int
replaced_chars: int
inserted_chars: int
deleted_chars: int
total_a: int
total_b: int
@property
def edit_distance_approx(self) -> int:
return self.replaced_chars + self.inserted_chars + self.deleted_chars
@property
def change_ratio(self) -> float:
denom = max(self.total_a, self.total_b)
return self.edit_distance_approx / denom if denom else 0.0
def summary(self) -> str:
return (
f"equal={self.equal_chars} replace={self.replaced_chars} "
f"insert={self.inserted_chars} delete={self.deleted_chars} "
f"change_ratio={self.change_ratio:.2%}"
)
def char_change_stats(a: str, b: str) -> ChangeStats:
"""
Compute character-level change statistics between strings a and b.
Example:
stats = char_change_stats("Hello World", "Hello Python")
print(stats.summary())
"""
sm = difflib.SequenceMatcher(None, a, b)
eq = rep = ins = dlt = 0
for tag, i1, i2, j1, j2 in sm.get_opcodes():
if tag == "equal":
eq += i2 - i1
elif tag == "replace":
rep += max(i2 - i1, j2 - j1)
elif tag == "insert":
ins += j2 - j1
elif tag == "delete":
dlt += i2 - i1
return ChangeStats(
equal_chars=eq, replaced_chars=rep,
inserted_chars=ins, deleted_chars=dlt,
total_a=len(a), total_b=len(b),
)
def line_change_summary(a: list[str], b: list[str]) -> dict[str, int]:
"""
Count added, removed, changed, and unchanged lines between two lists.
Example:
summary = line_change_summary(old.splitlines(), new.splitlines())
print(summary) # {'added': 3, 'removed': 1, 'changed': 2, 'equal': 20}
"""
sm = difflib.SequenceMatcher(None, a, b)
counts = {"added": 0, "removed": 0, "changed": 0, "equal": 0}
for tag, i1, i2, j1, j2 in sm.get_opcodes():
if tag == "equal":
counts["equal"] += i2 - i1
elif tag == "replace":
counts["changed"] += max(i2 - i1, j2 - j1)
elif tag == "insert":
counts["added"] += j2 - j1
elif tag == "delete":
counts["removed"] += i2 - i1
return counts
# ─────────────────────────────────────────────────────────────────────────────
# 3. Diff generation
# ─────────────────────────────────────────────────────────────────────────────
def unified_diff(
a: str | list[str],
b: str | list[str],
fromfile: str = "old",
tofile: str = "new",
context: int = 3,
) -> str:
"""
Generate a unified diff patch string.
Example:
patch = unified_diff(old_source, new_source, "main.py", "main.py")
print(patch)
"""
lines_a = a.splitlines(keepends=True) if isinstance(a, str) else a
lines_b = b.splitlines(keepends=True) if isinstance(b, str) else b
return "".join(difflib.unified_diff(lines_a, lines_b, fromfile=fromfile, tofile=tofile, n=context))
def context_diff_str(
a: str | list[str],
b: str | list[str],
fromfile: str = "old",
tofile: str = "new",
context: int = 3,
) -> str:
"""Generate a context diff patch string."""
lines_a = a.splitlines(keepends=True) if isinstance(a, str) else a
lines_b = b.splitlines(keepends=True) if isinstance(b, str) else b
return "".join(difflib.context_diff(lines_a, lines_b, fromfile=fromfile, tofile=tofile, n=context))
def ndiff_str(a: str | list[str], b: str | list[str]) -> str:
"""
Generate an ndiff output with inline change hints (? lines).
Example:
print(ndiff_str("colour\n", "color\n"))
"""
lines_a = a.splitlines(keepends=True) if isinstance(a, str) else a
lines_b = b.splitlines(keepends=True) if isinstance(b, str) else b
return "".join(difflib.ndiff(lines_a, lines_b))
def file_diff(path_a: str | Path, path_b: str | Path, context: int = 3) -> str:
"""
Generate a unified diff between two text files.
Example:
patch = file_diff("config.old.yaml", "config.yaml")
print(patch)
"""
a = Path(path_a).read_text(encoding="utf-8")
b = Path(path_b).read_text(encoding="utf-8")
return unified_diff(a, b, fromfile=str(path_a), tofile=str(path_b), context=context)
# ─────────────────────────────────────────────────────────────────────────────
# 4. HTML diff
# ─────────────────────────────────────────────────────────────────────────────
def html_diff_table(
a: str | list[str],
b: str | list[str],
fromdesc: str = "Old",
todesc: str = "New",
context: bool = True,
numlines: int = 5,
) -> str:
"""
Return an HTML side-by-side diff table fragment.
Example:
html = html_diff_table(old_text, new_text)
Path("diff.html").write_text(f"<html><body>{html}</body></html>")
"""
lines_a = a.splitlines() if isinstance(a, str) else [l.rstrip("\n") for l in a]
lines_b = b.splitlines() if isinstance(b, str) else [l.rstrip("\n") for l in b]
differ = difflib.HtmlDiff()
return differ.make_table(lines_a, lines_b, fromdesc=fromdesc, todesc=todesc,
context=context, numlines=numlines)
def html_diff_page(
a: str | list[str],
b: str | list[str],
fromdesc: str = "Old",
todesc: str = "New",
) -> str:
"""Return a full HTML page with a side-by-side diff."""
lines_a = a.splitlines() if isinstance(a, str) else [l.rstrip("\n") for l in a]
lines_b = b.splitlines() if isinstance(b, str) else [l.rstrip("\n") for l in b]
return difflib.HtmlDiff().make_file(lines_a, lines_b, fromdesc=fromdesc, todesc=todesc)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Config / dict diff
# ─────────────────────────────────────────────────────────────────────────────
def dict_diff(old: dict[str, Any], new: dict[str, Any]) -> dict[str, dict[str, Any]]:
"""
Return a structured diff of two flat dicts.
Keys: 'added', 'removed', 'changed' (with 'old'/'new' sub-keys).
Example:
d = dict_diff({"a": 1, "b": 2}, {"b": 3, "c": 4})
# {'added': {'c': 4}, 'removed': {'a': 1}, 'changed': {'b': {'old': 2, 'new': 3}}}
"""
old_keys = set(old)
new_keys = set(new)
return {
"added": {k: new[k] for k in new_keys - old_keys},
"removed": {k: old[k] for k in old_keys - new_keys},
"changed": {k: {"old": old[k], "new": new[k]}
for k in old_keys & new_keys if old[k] != new[k]},
}
def format_dict_diff(diff: dict[str, dict[str, Any]]) -> str:
"""
Format dict_diff output as a human-readable string.
Example:
d = dict_diff(old_config, new_config)
print(format_dict_diff(d))
"""
lines = []
for key, val in sorted(diff["added"].items()):
lines.append(f"+ {key} = {val!r}")
for key, val in sorted(diff["removed"].items()):
lines.append(f"- {key} = {val!r}")
for key, change in sorted(diff["changed"].items()):
lines.append(f"~ {key}: {change['old']!r} → {change['new']!r}")
return "\n".join(lines) if lines else "(no changes)"
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== difflib demo ===")
print("\n--- similarity ---")
pairs = [
("kitten", "sitting"),
("colour", "color"),
("hello", "hello"),
("Python", "Jython"),
]
for a, b in pairs:
print(f" similarity({a!r}, {b!r}) = {similarity(a, b):.3f}")
print("\n--- fuzzy_match ---")
vocab = ["python", "pycharm", "pypy", "cython", "jython", "pytest", "pathlib"]
print(f" fuzzy_match('pythn', vocab) = {fuzzy_match('pythn', vocab)}")
print(f" fuzzy_match('cython', vocab) = {fuzzy_match('cython', vocab)}")
print("\n--- most_similar ---")
for word, score in most_similar("colour", ["color", "col", "colon", "collar", "dollar"])[:3]:
print(f" {word}: {score:.3f}")
print("\n--- char_change_stats ---")
stats = char_change_stats("Hello World", "Hello Python World")
print(f" {stats.summary()}")
print("\n--- line_change_summary ---")
old_text = "line1\nline2\nline3\nline4\nline5"
new_text = "line1\nline2 modified\nline3\nnew_line\nline5"
summary = line_change_summary(old_text.splitlines(), new_text.splitlines())
print(f" {summary}")
print("\n--- unified_diff ---")
patch = unified_diff(old_text, new_text, "old.txt", "new.txt")
for line in patch.splitlines()[:12]:
print(f" {line}")
print("\n--- ndiff_str ---")
a_word = "colour\n"
b_word = "color\n"
print(ndiff_str(a_word, b_word), end="")
print("\n--- dict_diff ---")
old_cfg = {"host": "localhost", "port": 5432, "debug": True, "timeout": 30}
new_cfg = {"host": "prod.db", "port": 5432, "debug": False, "max_conn": 100}
d = dict_diff(old_cfg, new_cfg)
print(format_dict_diff(d))
print("\n--- html_diff_table (first 200 chars) ---")
html = html_diff_table(old_text, new_text)
print(f" {html[:200]}...")
print("\n=== done ===")
For the patch / unidiff alternative — the unidiff PyPI library provides a full parser for unified diff files, letting you programmatically inspect changed files, hunks, and lines from existing .patch files; stdlib difflib generates diffs but cannot parse pre-existing patches — use unidiff when you need to read, validate, or apply existing patch files in a build pipeline; use difflib when you need to generate diffs programmatically from Python strings or lists and do not have patches on disk. For the Levenshtein / rapidfuzz alternative — python-Levenshtein and rapidfuzz (both PyPI) compute edit distance and similarity in C extensions, running 10–100× faster than difflib.SequenceMatcher for large-scale fuzzy matching; rapidfuzz also adds partial-ratio, token-sort, and set-ratio scorers — use rapidfuzz for production spell-checkers, record deduplication, and fuzzy search over large corpora; use difflib.get_close_matches() and SequenceMatcher for small-scale fuzzy matching in CLI tools and scripts where no PyPI dependency is acceptable. The Claude Skills 360 bundle includes difflib skill sets covering similarity()/similarity_lines()/most_similar()/fuzzy_match()/approx_contains() scoring helpers, ChangeStats dataclass with char_change_stats()/line_change_summary(), unified_diff()/context_diff_str()/ndiff_str()/file_diff() patch generators, html_diff_table()/html_diff_page() HTML diff rendering, and dict_diff()/format_dict_diff() structured config comparison. Start with the free tier to try text comparison patterns and difflib pipeline code generation.