Python’s xml.sax.saxutils module provides utility functions and base classes for SAX-based XML processing. from xml.sax import saxutils. Three key pieces: Escape/unescape: saxutils.escape(data, entities={}) — escapes &, <, > in text content; entities dict adds extra replacements. saxutils.unescape(data, entities={}) — reverses the escape; entities adds custom entity mappings beside the defaults &, <, >. saxutils.quoteattr(data, entities={}) — escapes text for use as an XML attribute value, wrapping in " or ' automatically to minimise escaping. XMLGenerator: saxutils.XMLGenerator(out=None, encoding="iso-8859-1", short_empty_elements=False) — a ContentHandler that writes SAX events as XML to file out; call startElement(name, attrs), characters(content), endElement(name) to build well-formed XML. XMLFilterBase: saxutils.XMLFilterBase(parent=None) — a ContentHandler + XMLReader that by default passes all events to parent; subclass and override individual event methods to transform, filter, or instrument a SAX pipeline. Claude Code generates XML escaping utilities, SAX event loggers, element-stripping filters, attribute-rewriting filters, namespace-injecting filters, and XML transformation pipelines.
CLAUDE.md for xml.sax.saxutils
## xml.sax.saxutils Stack
- Stdlib: from xml.sax import saxutils, parse, parseString
- from xml.sax.handler import ContentHandler
- Escape: saxutils.escape(text) # & < >
- saxutils.escape(text, {"'": "'", '"': """})
- saxutils.unescape(text) # reverse
- saxutils.quoteattr(value) # for attributes
- Write: gen = saxutils.XMLGenerator(out, encoding="utf-8", short_empty_elements=True)
- gen.startDocument()
- gen.startElement("tag", attrs_impl)
- gen.characters("text")
- gen.endElement("tag")
- Filter: class MyFilter(saxutils.XMLFilterBase):
- def startElement(self, name, attrs): ...
- super().startElement(name, attrs)
xml.sax.saxutils XML Processing Pipeline
# app/xmlsaxutilsutil.py — escape, generate, filter, transform, log, strip
from __future__ import annotations
import io
import xml.sax
import xml.sax.handler
from xml.sax import saxutils
from xml.sax.xmlreader import AttributesImpl
from typing import Any
# ─────────────────────────────────────────────────────────────────────────────
# 1. Escape / unescape helpers
# ─────────────────────────────────────────────────────────────────────────────
def xml_escape(text: str,
full: bool = False) -> str:
"""
Escape text for XML element content.
full=True also escapes single and double quotes (for attribute context).
Example:
xml_escape("1 < 2 & 3 > 4") # "1 < 2 & 3 > 4"
xml_escape("<b>\"hello\"</b>", full=True)
"""
extras: dict[str, str] = {"'": "'", '"': """} if full else {}
return saxutils.escape(text, extras)
def xml_unescape(text: str,
extras: "dict[str, str] | None" = None) -> str:
"""
Unescape XML entity references in text content.
extras: additional { "&entity;": "char" } mappings.
Example:
xml_unescape("cats & dogs < 42") # "cats & dogs < 42"
"""
return saxutils.unescape(text, extras or {})
def xml_attr(value: str) -> str:
"""
Return the value quoted and escaped for use as an XML attribute.
Chooses quote char automatically.
Example:
xml_attr('He said "hello"') # "'He said \"hello\"'"
xml_attr("it's alive") # '"it\'s alive"'
"""
return saxutils.quoteattr(value)
# ─────────────────────────────────────────────────────────────────────────────
# 2. XMLGenerator-based document builder
# ─────────────────────────────────────────────────────────────────────────────
class SimpleXMLWriter:
"""
Context-manager XML writer on top of XMLGenerator.
Tracks nesting depth and provides convenience methods.
Example:
out = io.StringIO()
with SimpleXMLWriter(out) as w:
with w.element("catalog", {"version": "1"}):
with w.element("book", {"id": "1"}):
w.text_element("title", "Python Cookbook")
w.text_element("price", "39.99")
print(out.getvalue())
"""
def __init__(self, out: "io.IOBase | io.StringIO | io.BytesIO",
encoding: str = "utf-8",
short_empty: bool = True) -> None:
self._gen = saxutils.XMLGenerator(out, encoding=encoding,
short_empty_elements=short_empty)
self._gen.startDocument()
def start(self, tag: str,
attrs: "dict[str, str] | None" = None) -> None:
impl = AttributesImpl(attrs or {})
self._gen.startElement(tag, impl)
def end(self, tag: str) -> None:
self._gen.endElement(tag)
def characters(self, text: str) -> None:
self._gen.characters(text)
def text_element(self, tag: str, text: str,
attrs: "dict[str, str] | None" = None) -> None:
"""Convenience: open tag, write text, close tag."""
self.start(tag, attrs)
self.characters(text)
self.end(tag)
def empty(self, tag: str,
attrs: "dict[str, str] | None" = None) -> None:
"""Write a self-closing element."""
self.start(tag, attrs)
self.end(tag)
def element(self, tag: str,
attrs: "dict[str, str] | None" = None) -> "_ElementContext":
"""Context manager for a nested element."""
return _ElementContext(self, tag, attrs)
def close(self) -> None:
pass # XMLGenerator writes eagerly; nothing to flush
class _ElementContext:
def __init__(self, writer: SimpleXMLWriter,
tag: str, attrs: "dict[str, str] | None") -> None:
self._w = writer
self._tag = tag
self._attrs = attrs
def __enter__(self) -> "_ElementContext":
self._w.start(self._tag, self._attrs)
return self
def __exit__(self, *_: Any) -> None:
self._w.end(self._tag)
def build_xml(root_tag: str,
children: "list[tuple[str, dict, str]]",
root_attrs: "dict[str, str] | None" = None) -> str:
"""
Build a simple XML document.
children: list of (tag, attrs_dict, text_content).
Example:
xml = build_xml("catalog", [
("book", {"id": "1"}, "Python Cookbook"),
("book", {"id": "2"}, "Learning Python"),
], {"version": "1"})
"""
out = io.StringIO()
with SimpleXMLWriter(out) as w:
with w.element(root_tag, root_attrs):
for tag, attrs, text in children:
w.text_element(tag, text, attrs)
return out.getvalue()
# ─────────────────────────────────────────────────────────────────────────────
# 3. XMLFilterBase: element-stripping filter
# ─────────────────────────────────────────────────────────────────────────────
class StripElementsFilter(saxutils.XMLFilterBase):
"""
SAX filter that removes (strips) elements with given tag names and their content.
Example:
out = io.StringIO()
gen = saxutils.XMLGenerator(out, "utf-8")
filt = StripElementsFilter(gen, {"price", "secret"})
xml.sax.parseString(xml_bytes, filt)
print(out.getvalue())
"""
def __init__(self, downstream: xml.sax.handler.ContentHandler,
strip_tags: "set[str]") -> None:
super().__init__()
self._down = downstream
self._strip = strip_tags
self._depth = 0 # nesting depth inside a stripped element
def startDocument(self) -> None:
self._down.startDocument()
def endDocument(self) -> None:
self._down.endDocument()
def startElement(self, name: str, attrs: Any) -> None:
if self._depth > 0 or name in self._strip:
self._depth += 1
else:
self._down.startElement(name, attrs)
def endElement(self, name: str) -> None:
if self._depth > 0:
self._depth -= 1
else:
self._down.endElement(name)
def characters(self, content: str) -> None:
if self._depth == 0:
self._down.characters(content)
def ignorableWhitespace(self, whitespace: str) -> None:
if self._depth == 0:
self._down.ignorableWhitespace(whitespace)
# ─────────────────────────────────────────────────────────────────────────────
# 4. XMLFilterBase: attribute-rewriting filter
# ─────────────────────────────────────────────────────────────────────────────
class RenameAttributeFilter(saxutils.XMLFilterBase):
"""
SAX filter that renames attribute keys across all elements.
rename: { "old_attr": "new_attr" }
Example:
out = io.StringIO()
gen = saxutils.XMLGenerator(out, "utf-8")
filt = RenameAttributeFilter(gen, {"class": "css_class"})
xml.sax.parseString(xml_bytes, filt)
"""
def __init__(self, downstream: xml.sax.handler.ContentHandler,
rename: "dict[str, str]") -> None:
super().__init__()
self._down = downstream
self._rename = rename
def startDocument(self) -> None:
self._down.startDocument()
def endDocument(self) -> None:
self._down.endDocument()
def startElement(self, name: str, attrs: Any) -> None:
new_attrs = {self._rename.get(k, k): v
for k, v in attrs.items()}
self._down.startElement(name, AttributesImpl(new_attrs))
def endElement(self, name: str) -> None:
self._down.endElement(name)
def characters(self, content: str) -> None:
self._down.characters(content)
def apply_filter(xml_source: "bytes | str",
strip_tags: "set[str] | None" = None,
rename_attrs: "dict[str, str] | None" = None) -> str:
"""
Apply strip and/or attribute-rename filters to XML.
Returns the filtered XML string.
Example:
result = apply_filter(xml_bytes, strip_tags={"price", "notes"})
"""
if isinstance(xml_source, str):
xml_source = xml_source.encode("utf-8")
out = io.StringIO()
gen = saxutils.XMLGenerator(out, "utf-8", short_empty_elements=True)
handler: xml.sax.handler.ContentHandler = gen
if rename_attrs:
handler = RenameAttributeFilter(handler, rename_attrs) # type: ignore[assignment]
if strip_tags:
handler = StripElementsFilter(handler, strip_tags) # type: ignore[assignment]
xml.sax.parseString(xml_source, handler) # type: ignore[arg-type]
return out.getvalue()
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== xml.sax.saxutils demo ===")
# ── escape / unescape ─────────────────────────────────────────────────
print("\n--- escape / unescape ---")
raw_texts = [
"1 < 2 & 3 > 4",
'He said "hello" & it\'s fine',
"<script>alert('xss')</script>",
]
for t in raw_texts:
esc = xml_escape(t)
unesc = xml_unescape(esc)
print(f" orig : {t!r}")
print(f" escaped: {esc!r}")
print(f" round : {unesc == t}")
# ── quoteattr ─────────────────────────────────────────────────────────
print("\n--- xml_attr ---")
for val in ['simple', "it's got apostrophe", 'has "quotes"', "both ' and \"quotes\"'"]:
print(f" {val!r:35s} → {xml_attr(val)}")
# ── SimpleXMLWriter ───────────────────────────────────────────────────
print("\n--- SimpleXMLWriter ---")
xml_out = build_xml(
"catalog", [
("book", {"id": "b1", "lang": "en"}, "Python Cookbook"),
("book", {"id": "b2", "lang": "fr"}, "Apprendre Python"),
("magazine", {"id": "m1"}, "Python Weekly"),
],
{"version": "1.0"}
)
print(xml_out[:300])
# ── apply_filter (strip + rename) ─────────────────────────────────────
print("\n--- apply_filter ---")
source_xml = (
b'<?xml version="1.0"?>'
b'<catalog>'
b'<book id="b1" class="fiction"><title>Python</title><price>39.99</price></book>'
b'<book id="b2" class="tech"><title>Django</title><price>29.99</price></book>'
b'</catalog>'
)
filtered = apply_filter(source_xml,
strip_tags={"price"},
rename_attrs={"class": "category"})
print(filtered[:400])
print("\n=== done ===")
For the html stdlib companion — html.escape(s) escapes only the 5 critical HTML/XML characters (&, <, >, ", ') and is the right choice for HTML output; saxutils.escape() by default handles only &, <, > (without escaping quotes) which is correct for XML element text content but not for attribute values — always use saxutils.quoteattr() for attribute values and saxutils.escape() only for text nodes. For the defusedxml (PyPI) alternative — defusedxml.sax.parseString(data, handler) is a drop-in replacement for xml.sax.parseString that mitigates XML bomb and XXE attacks by rejecting billion-laughs expansions and external entity references — use defusedxml whenever parsing untrusted XML; use stdlib xml.sax/saxutils only for trusted input, and always explicitly disable DTD features. The Claude Skills 360 bundle includes xml.sax.saxutils skill sets covering xml_escape()/xml_unescape()/xml_attr() escape helpers, SimpleXMLWriter/build_xml() XMLGenerator-based document builder, StripElementsFilter element-removal filter, RenameAttributeFilter attribute-rewriting filter, and apply_filter() composable filter pipeline. Start with the free tier to try SAX utility patterns and xml.sax.saxutils pipeline code generation.