Python’s formatter module provides an abstract event-based framework for building document formatters — originally used by htmllib (removed in 3.0). import formatter. Core abstraction: formatter.AbstractFormatter(writer) — drives an AbstractWriter backend; formatter.AbstractWriter — output sink. Implementations: formatter.DumbWriter(file=None, maxcol=72) — simple line-wrapping writer that outputs to a file. Formatter methods: .add_literal_data(data) — emit verbatim text; .add_flowing_data(data) — add text that can be wrapped; .add_line_break() — emit a newline; .add_vert_space(height) — vertical spacing; .push_font((size, italic, bold, teletype)) / .pop_font() — font stack; .push_margin(margin) / .pop_margin() — indentation stack (index key); .push_style(style) / .pop_style() — style stack; .assert_line_data(flag=1) — ensure we’re mid-line. Writer methods: .new_font(font), .new_margin(margin, level, margin_index), .send_paragraph(blankline), .send_flowing_data(data), .send_literal_data(data), .send_label_data(data), .send_hor_rule(), .send_line_break(). Null objects: formatter.NullFormatter, formatter.NullWriter. Note: formatter is deprecated in 3.4, removed in 3.13 — implement directly with io.StringIO or textwrap; include compatibility guard. Claude Code generates text layout engines, document renderers, markup-to-text converters, and structured text formatters.
CLAUDE.md for formatter
## formatter Stack
- Stdlib: import formatter (deprecated 3.4, removed 3.13 — guard with try/except)
- Use: w = formatter.DumbWriter(io.StringIO(), maxcol=80)
- f = formatter.AbstractFormatter(w)
- f.add_flowing_data("hello ")
- f.add_literal_data("world")
- f.add_line_break()
- text = w.file.getvalue()
- Modern: use textwrap, io.StringIO, or a custom writer class directly
formatter Document Layout Pipeline
# app/formatterutil.py — custom writer, html-to-text, layout engine, fallback
from __future__ import annotations
import io
import textwrap
from dataclasses import dataclass, field
from html.parser import HTMLParser
# Guard for Python 3.13+ where formatter is removed
try:
import formatter as _formatter
_FORMATTER_AVAILABLE = True
except ImportError:
_FORMATTER_AVAILABLE = False
# ─────────────────────────────────────────────────────────────────────────────
# 1. Custom writer (works with or without formatter module)
# ─────────────────────────────────────────────────────────────────────────────
class StringWriter:
"""
A simple text writer that accumulates output in a StringIO buffer.
Implements the AbstractWriter interface so it works with formatter.AbstractFormatter
AND can be used standalone.
Example:
w = StringWriter(maxcol=72)
w.send_flowing_data("Hello, ")
w.send_flowing_data("world!")
w.send_line_break()
print(w.getvalue())
"""
def __init__(self, maxcol: int = 72) -> None:
self._buf = io.StringIO()
self._maxcol = maxcol
self._col = 0
self._margin = 0
self._line_started = False
def new_alignment(self, align: str | None) -> None:
pass
def new_font(self, font: tuple | None) -> None:
pass
def new_margin(self, margin, level: int, margin_index) -> None:
self._margin = level * 2 # 2 spaces per indent level
def new_styles(self, styles: tuple) -> None:
pass
def send_paragraph(self, blankline: int) -> None:
if self._line_started:
self._buf.write("\n")
self._col = 0
self._line_started = False
for _ in range(blankline):
self._buf.write("\n")
def send_line_break(self) -> None:
self._buf.write("\n")
self._col = 0
self._line_started = False
def send_hor_rule(self, *args, **kwargs) -> None:
self._buf.write("\n" + "-" * self._maxcol + "\n")
self._col = 0
self._line_started = False
def send_label_data(self, data: str) -> None:
self.send_flowing_data(data)
def send_flowing_data(self, data: str) -> None:
"""Add flowing text, wrapping at maxcol."""
if not data:
return
words = data.split()
indent = " " * self._margin
for word in words:
if self._col == 0:
self._buf.write(indent)
self._col = self._margin
wlen = len(word)
if self._line_started and self._col + 1 + wlen > self._maxcol:
self._buf.write("\n" + indent)
self._col = self._margin
self._line_started = False
if self._line_started:
self._buf.write(" ")
self._col += 1
self._buf.write(word)
self._col += wlen
self._line_started = True
def send_literal_data(self, data: str) -> None:
"""Emit pre-formatted literal text verbatim."""
self._buf.write(data)
lines = data.split("\n")
if lines:
self._col = len(lines[-1])
self._line_started = bool(lines[-1])
def getvalue(self) -> str:
return self._buf.getvalue()
def reset(self) -> None:
self._buf = io.StringIO()
self._col = 0
self._line_started = False
# ─────────────────────────────────────────────────────────────────────────────
# 2. Layout operations (via formatter module or direct StringWriter)
# ─────────────────────────────────────────────────────────────────────────────
class DocumentFormatter:
"""
High-level document formatter that works with or without the formatter module.
Provides paragraph, heading, list, link, and horizontal rule formatting.
Example:
doc = DocumentFormatter(maxcol=72)
doc.heading("Title", level=1)
doc.paragraph("This is a paragraph with some text.")
doc.bullet_list(["item one", "item two", "item three"])
print(doc.getvalue())
"""
def __init__(self, maxcol: int = 72) -> None:
self._writer = StringWriter(maxcol=maxcol)
self._maxcol = maxcol
self._paragraph_pending = False
if _FORMATTER_AVAILABLE:
self._formatter = _formatter.AbstractFormatter(self._writer)
else:
self._formatter = None
def _flush_paragraph(self) -> None:
if self._paragraph_pending:
self._writer.send_paragraph(1)
self._paragraph_pending = False
def paragraph(self, text: str) -> "DocumentFormatter":
"""Emit a paragraph of flowing text."""
self._flush_paragraph()
if self._formatter:
self._formatter.add_flowing_data(text)
self._formatter.add_paragraph(1)
else:
self._writer.send_flowing_data(text)
self._writer.send_paragraph(1)
return self
def heading(self, text: str, level: int = 1) -> "DocumentFormatter":
"""Emit a heading (underlined with = or -)."""
self._flush_paragraph()
self._writer.send_line_break()
if level == 1:
underline = "=" * min(len(text), self._maxcol)
else:
underline = "-" * min(len(text), self._maxcol)
self._writer.send_literal_data(text + "\n" + underline + "\n")
return self
def literal(self, text: str) -> "DocumentFormatter":
"""Emit literal/preformatted text (indented 4 spaces)."""
self._flush_paragraph()
self._writer.send_line_break()
lines = text.splitlines()
for line in lines:
self._writer.send_literal_data(" " + line + "\n")
self._writer.send_line_break()
return self
def hr(self) -> "DocumentFormatter":
"""Emit a horizontal rule."""
self._writer.send_hor_rule()
return self
def bullet_list(self, items: list[str]) -> "DocumentFormatter":
"""Emit an unordered bullet list."""
self._flush_paragraph()
for item in items:
wrapped = textwrap.fill(item, width=self._maxcol - 4,
subsequent_indent=" ")
self._writer.send_literal_data(" • " + wrapped + "\n")
self._writer.send_line_break()
return self
def numbered_list(self, items: list[str]) -> "DocumentFormatter":
"""Emit an ordered numbered list."""
self._flush_paragraph()
for i, item in enumerate(items, 1):
prefix = f"{i:2d}. "
wrapped = textwrap.fill(item, width=self._maxcol - len(prefix),
subsequent_indent=" " * len(prefix))
self._writer.send_literal_data(prefix + wrapped + "\n")
self._writer.send_line_break()
return self
def definition(self, term: str, definition: str) -> "DocumentFormatter":
"""Emit a definition list entry."""
self._flush_paragraph()
self._writer.send_literal_data(f"{term}:\n")
wrapped = textwrap.fill(definition, width=self._maxcol - 4,
initial_indent=" ", subsequent_indent=" ")
self._writer.send_literal_data(wrapped + "\n\n")
return self
def getvalue(self) -> str:
return self._writer.getvalue()
def reset(self) -> None:
self._writer.reset()
# ─────────────────────────────────────────────────────────────────────────────
# 3. HTML-to-text converter
# ─────────────────────────────────────────────────────────────────────────────
class HtmlToTextParser(HTMLParser):
"""
Convert simple HTML to plain text using DocumentFormatter.
Handles: <h1>-<h6>, <p>, <br>, <hr>, <ul>/<ol>/<li>, <pre>/<code>,
<b>/<strong>/<em>/<i>, <a href>, block-level nesting.
Example:
html = "<h1>Title</h1><p>Hello <b>world</b>!</p><ul><li>A</li><li>B</li></ul>"
text = html_to_text(html, maxcol=72)
print(text)
"""
def __init__(self, maxcol: int = 72) -> None:
super().__init__(convert_charrefs=True)
self._doc = DocumentFormatter(maxcol=maxcol)
self._current_text: list[str] = []
self._list_stack: list[tuple[str, int]] = [] # (type, counter)
self._in_pre: bool = False
self._pre_buf: list[str] = []
self._skip_tags: set[str] = {"script", "style", "head"}
self._skip_depth: int = 0
def handle_starttag(self, tag: str, attrs: list) -> None:
if tag in self._skip_tags:
self._skip_depth += 1
return
if self._skip_depth:
return
tag_map_flush = {"h1", "h2", "h3", "h4", "h5", "h6", "p"}
if tag in tag_map_flush:
self._flush_text()
if tag in ("h1", "h2"):
self._current_text = []
elif tag in ("h3", "h4", "h5", "h6"):
self._current_text = []
elif tag == "p":
self._current_text = []
elif tag in ("br",):
self._flush_text()
self._doc._writer.send_line_break()
elif tag == "hr":
self._flush_text()
self._doc.hr()
elif tag in ("ul", "ol"):
self._flush_text()
self._list_stack.append((tag, 0))
elif tag == "li":
self._flush_text()
self._current_text = []
elif tag in ("pre", "code") and not self._in_pre:
self._flush_text()
self._in_pre = True
self._pre_buf = []
elif tag == "a":
href = dict(attrs).get("href", "")
if href:
self._current_text.append(f"[")
elif tag in ("b", "strong"):
pass # no markup in plain text
elif tag in ("em", "i"):
pass
def handle_endtag(self, tag: str) -> None:
if tag in self._skip_tags:
self._skip_depth = max(0, self._skip_depth - 1)
return
if self._skip_depth:
return
if tag in ("h1", "h2"):
text = "".join(self._current_text).strip()
self._current_text = []
self._doc.heading(text, level=1 if tag == "h1" else 2)
elif tag in ("h3", "h4", "h5", "h6"):
text = "".join(self._current_text).strip()
self._current_text = []
if text:
self._doc._writer.send_literal_data(f"### {text}\n\n")
elif tag == "p":
text = " ".join("".join(self._current_text).split())
self._current_text = []
if text:
self._doc.paragraph(text)
elif tag == "li":
text = " ".join("".join(self._current_text).split())
self._current_text = []
ltype = self._list_stack[-1][0] if self._list_stack else "ul"
if ltype == "ol":
count = self._list_stack[-1][1] + 1
self._list_stack[-1] = ("ol", count)
self._doc._writer.send_literal_data(f" {count}. {text}\n")
else:
self._doc._writer.send_literal_data(f" • {text}\n")
elif tag in ("ul", "ol"):
self._flush_text()
if self._list_stack:
self._list_stack.pop()
self._doc._writer.send_line_break()
elif tag in ("pre", "code") and self._in_pre:
self._in_pre = False
self._doc.literal("".join(self._pre_buf))
self._pre_buf = []
elif tag == "a":
self._current_text.append("]")
def handle_data(self, data: str) -> None:
if self._skip_depth:
return
if self._in_pre:
self._pre_buf.append(data)
else:
self._current_text.append(data)
def _flush_text(self) -> None:
text = " ".join("".join(self._current_text).split())
if text:
self._doc._writer.send_flowing_data(text + " ")
self._current_text = []
def getvalue(self) -> str:
self._flush_text()
return self._doc.getvalue()
def html_to_text(html_string: str, maxcol: int = 72) -> str:
"""
Convert HTML to plain text.
Example:
text = html_to_text("<h1>Title</h1><p>Paragraph text.</p>")
"""
parser = HtmlToTextParser(maxcol=maxcol)
parser.feed(html_string)
return parser.getvalue()
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== formatter demo ===")
if not _FORMATTER_AVAILABLE:
print(" formatter not available (Python 3.13+); using StringWriter directly")
# ── DocumentFormatter ─────────────────────────────────────────────────────
print("\n--- DocumentFormatter ---")
doc = DocumentFormatter(maxcol=60)
doc.heading("Python Standard Library Guide", level=1)
doc.paragraph(
"Python ships with a comprehensive standard library covering "
"everything from data structures to cryptography to audio "
"processing. The formatter module is one of the oldest parts "
"of this ecosystem."
)
doc.hr()
doc.heading("Key Modules", level=2)
doc.bullet_list(["os — operating system interface",
"re — regular expressions",
"json — JSON encoder and decoder",
"pathlib — object-oriented filesystem paths"])
doc.numbered_list(["Install Python 3.12+",
"Read the documentation",
"Build something great"])
doc.definition("DumbWriter", "A simple formatter.AbstractWriter that "
"outputs flowing text to a file, wrapping at maxcol.")
output = doc.getvalue()
for line in output.splitlines():
print(f" {line}")
# ── html_to_text ──────────────────────────────────────────────────────────
print("\n--- html_to_text ---")
sample_html = """
<h1>Claude Code for formatter</h1>
<p>The <b>formatter</b> module provides an <em>abstract</em> framework for
building document formatters. It's useful for converting structured documents
to plain text output.</p>
<h2>Features</h2>
<ul>
<li>Abstract formatter and writer classes</li>
<li>DumbWriter for simple line-wrapped output</li>
<li>NullWriter and NullFormatter for testing</li>
</ul>
<pre>import formatter
f = formatter.DumbWriter()
</pre>
<hr/>
<p>See the <a href="https://docs.python.org/3/library/formatter.html">docs</a>.</p>
"""
text = html_to_text(sample_html, maxcol=60)
for line in text.splitlines():
print(f" {line}")
# ── StringWriter direct ────────────────────────────────────────────────────
print("\n--- StringWriter direct ---")
w = StringWriter(maxcol=50)
w.new_margin(None, 0, 0)
w.send_flowing_data("This is flowing text that should wrap at the ")
w.send_flowing_data("fifty-column mark automatically without cutting ")
w.send_flowing_data("any words in the middle of output.")
w.send_line_break()
w.send_literal_data(" Code block: x = 42\n")
w.send_hor_rule()
for line in w.getvalue().splitlines():
print(f" {line!r}")
print("\n=== done ===")
For the textwrap alternative — textwrap.wrap(text, width=72) and textwrap.fill(text, width=72, initial_indent=" ", subsequent_indent=" ") provide the core word-wrapping operation that StringWriter.send_flowing_data() uses internally — use textwrap for straightforward paragraph formatting; use a custom writer class like StringWriter above when you need a stateful formatter with indent stacks, font tracking, paragraph state, and the ability to mix literal and flowing text. For the html.parser alternative — html.parser.HTMLParser.feed(html) with method overrides for handle_starttag/handle_endtag/handle_data provides the same event-based HTML parsing that the original htmllib used formatter.AbstractFormatter to render — the HtmlToTextParser above shows the modern pattern: combine HTMLParser for parsing with DocumentFormatter for rendering, replacing the formatter-based tool chain that htmllib used. The Claude Skills 360 bundle includes formatter skill sets covering StringWriter with send_flowing_data()/send_literal_data()/send_paragraph()/send_hor_rule(), DocumentFormatter with heading()/paragraph()/literal()/bullet_list()/numbered_list()/definition(), and HtmlToTextParser with html_to_text() HTML→text converter. Start with the free tier to try document formatting patterns and formatter pipeline code generation.