Python’s xml.etree.ElementTree module parses and generates XML using a lightweight tree API. import xml.etree.ElementTree as ET. Parse file: tree = ET.parse("file.xml") → ElementTree; root = tree.getroot(). Parse string: root = ET.fromstring(xml_bytes_or_str). Element attributes: el.tag (str), el.text (str|None, text before first child), el.tail (str|None, text after close tag), el.attrib (dict). Find: el.find("tag") → first child or None; el.findall("path") → list; el.findtext("path", default="") → text; el.iter("tag") → all descendants. XPath subset: el.findall("./channel/item"), el.findall(".//{ns}name"), el.findall(".//item[@type='feed']"); namespace maps: {'ns': 'http://...'} passed as second arg to find/findall. Create: el = ET.Element("root", attrib={"id": "1"}) — ET.SubElement(parent, "child", text="…") creates and appends in one call. Serialize: ET.tostring(el, encoding="unicode", xml_declaration=False). Pretty-print: ET.indent(el). Write to file: tree.write("out.xml", encoding="utf-8", xml_declaration=True). Incremental: ET.iterparse(file, events=("start","end")) for large files. Claude Code generates config readers, RSS parsers, SOAP request builders, sitemap generators, and XML diff tools.
CLAUDE.md for xml.etree.ElementTree
## xml.etree.ElementTree Stack
- Stdlib: import xml.etree.ElementTree as ET
- Parse: root = ET.fromstring(xml_str)
- tree = ET.parse("file.xml"); root = tree.getroot()
- Find: el.find("tag") el.findall("./channel/item")
- el.findtext("title", default="")
- Create: root = ET.Element("root")
- child = ET.SubElement(root, "item", id="1"); child.text = "hello"
- Dump: ET.indent(root); print(ET.tostring(root, encoding="unicode"))
- Write: tree.write("out.xml", encoding="utf-8", xml_declaration=True)
xml.etree.ElementTree XML Pipeline
# app/xml_etree_util.py — parse, create, RSS, SOAP, sitemap, diff
from __future__ import annotations
import io
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterator
# ─────────────────────────────────────────────────────────────────────────────
# 1. Parse helpers
# ─────────────────────────────────────────────────────────────────────────────
def load_xml(source: "str | Path | bytes") -> ET.Element:
"""
Load XML from a file path, bytes, or string and return the root Element.
Example:
root = load_xml("/etc/app/config.xml")
root = load_xml(b"<root><item>1</item></root>")
"""
if isinstance(source, (str, Path)):
return ET.parse(str(source)).getroot()
return ET.fromstring(source)
def find_text(el: ET.Element, path: str, default: str = "") -> str:
"""
Return the text of the first element matching path, or default.
Example:
title = find_text(channel, "title")
"""
return el.findtext(path) or default
def iter_elements(root: ET.Element, tag: str) -> Iterator[ET.Element]:
"""
Yield all descendant elements with the given tag.
Example:
for item in iter_elements(root, "item"):
print(find_text(item, "title"))
"""
return root.iter(tag)
def attrib(el: ET.Element, name: str, default: str = "") -> str:
"""
Return attribute value or default.
Example:
src = attrib(img_el, "src")
"""
return el.get(name, default)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Element builder helpers
# ─────────────────────────────────────────────────────────────────────────────
def new_element(tag: str, text: "str | None" = None, **attrs: str) -> ET.Element:
"""
Create an Element with optional text and attributes.
Example:
el = new_element("title", "My Page")
img = new_element("img", src="logo.png", alt="Logo")
"""
el = ET.Element(tag, attrib=attrs)
if text is not None:
el.text = text
return el
def append_child(
parent: ET.Element,
tag: str,
text: "str | None" = None,
**attrs: str,
) -> ET.Element:
"""
Create a child element, append it to parent, and return it.
Example:
item = append_child(channel, "item")
append_child(item, "title", "Hello World")
append_child(item, "link", "https://example.com/1")
"""
child = ET.SubElement(parent, tag, attrib=attrs)
if text is not None:
child.text = text
return child
def to_string(root: ET.Element, indent: bool = True, xml_decl: bool = False) -> str:
"""
Serialize an Element to a pretty-printed XML string.
Example:
print(to_string(root))
"""
if indent:
ET.indent(root)
header = '<?xml version="1.0" encoding="utf-8"?>\n' if xml_decl else ""
return header + ET.tostring(root, encoding="unicode")
# ─────────────────────────────────────────────────────────────────────────────
# 3. RSS feed parser
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class RssItem:
title: str = ""
link: str = ""
description: str = ""
pub_date: str = ""
guid: str = ""
@dataclass
class RssFeed:
title: str = ""
link: str = ""
description: str = ""
items: list[RssItem] = field(default_factory=list)
def parse_rss(xml_source: "str | bytes | Path") -> RssFeed:
"""
Parse an RSS 2.0 feed into an RssFeed dataclass.
Example:
feed = parse_rss(urllib.request.urlopen(url).read())
for item in feed.items[:5]:
print(item.title, item.link)
"""
root = load_xml(xml_source)
channel = root.find("channel") or root
feed = RssFeed(
title=find_text(channel, "title"),
link=find_text(channel, "link"),
description=find_text(channel, "description"),
)
for item_el in channel.findall("item"):
feed.items.append(RssItem(
title=find_text(item_el, "title"),
link=find_text(item_el, "link"),
description=find_text(item_el, "description"),
pub_date=find_text(item_el, "pubDate"),
guid=find_text(item_el, "guid"),
))
return feed
def build_rss(feed: RssFeed) -> str:
"""
Serialize an RssFeed to an RSS 2.0 XML string.
Example:
feed = RssFeed(title="My Blog", link="https://example.com")
feed.items.append(RssItem(title="Post 1", link="/post/1"))
print(build_rss(feed))
"""
rss = ET.Element("rss", version="2.0")
chan = append_child(rss, "channel")
append_child(chan, "title", feed.title)
append_child(chan, "link", feed.link)
append_child(chan, "description", feed.description)
for item in feed.items:
el = append_child(chan, "item")
append_child(el, "title", item.title)
append_child(el, "link", item.link)
if item.description:
append_child(el, "description", item.description)
if item.pub_date:
append_child(el, "pubDate", item.pub_date)
if item.guid:
append_child(el, "guid", item.guid)
return to_string(rss, xml_decl=True)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Namespace-aware utilities
# ─────────────────────────────────────────────────────────────────────────────
def strip_ns(tag: str) -> str:
"""
Remove the {namespace} prefix from a qualified tag name.
Example:
tag = "{http://www.w3.org/2005/Atom}entry"
print(strip_ns(tag)) # "entry"
"""
return tag.split("}", 1)[-1] if "}" in tag else tag
def ns_map_from_root(root: ET.Element) -> dict[str, str]:
"""
Build a prefix→URI namespace map from a root element's tag.
Useful when the namespace URI is known and you want to build a map.
Example:
root = ET.fromstring(atom_xml)
ns = {"atom": "http://www.w3.org/2005/Atom"}
for entry in root.findall("atom:entry", ns):
print(find_text(entry, "atom:title", ns))
"""
# ET.fromstring doesn't expose prefix; return empty dict for caller to fill
return {}
def flatten_xml(el: ET.Element, prefix: str = "") -> dict[str, str]:
"""
Flatten an XML element tree into a dotted-path → text dict.
Useful for simple config parsing.
Example:
data = flatten_xml(ET.fromstring("<cfg><db><host>localhost</host></db></cfg>"))
# {"cfg.db.host": "localhost"}
"""
result: dict[str, str] = {}
tag = strip_ns(el.tag)
path = f"{prefix}.{tag}" if prefix else tag
if el.text and el.text.strip():
result[path] = el.text.strip()
for child in el:
result.update(flatten_xml(child, path))
return result
# ─────────────────────────────────────────────────────────────────────────────
# 5. Sitemap generator
# ─────────────────────────────────────────────────────────────────────────────
_SITEMAP_NS = "http://www.sitemaps.org/schemas/sitemap/0.9"
def build_sitemap(urls: list[dict]) -> str:
"""
Build an XML sitemap from a list of URL dicts.
Each dict: {"loc": str, "lastmod": str (optional), "priority": str (optional)}
Example:
xml = build_sitemap([
{"loc": "https://example.com/", "priority": "1.0"},
{"loc": "https://example.com/about", "lastmod": "2028-12-01"},
])
"""
ET.register_namespace("", _SITEMAP_NS)
urlset = ET.Element(f"{{{_SITEMAP_NS}}}urlset")
for u in urls:
url_el = ET.SubElement(urlset, f"{{{_SITEMAP_NS}}}url")
ET.SubElement(url_el, f"{{{_SITEMAP_NS}}}loc").text = u["loc"]
if "lastmod" in u:
ET.SubElement(url_el, f"{{{_SITEMAP_NS}}}lastmod").text = u["lastmod"]
if "priority" in u:
ET.SubElement(url_el, f"{{{_SITEMAP_NS}}}priority").text = u["priority"]
ET.indent(urlset)
return '<?xml version="1.0" encoding="UTF-8"?>\n' + ET.tostring(urlset, encoding="unicode")
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== xml.etree.ElementTree demo ===")
# ── parse from string ─────────────────────────────────────────────────────
print("\n--- load_xml + find_text ---")
xml_str = b"""<library>
<book id="1"><title>Python Cookbook</title><author>Beazley</author></book>
<book id="2"><title>Fluent Python</title><author>Ramalho</author></book>
</library>"""
root = load_xml(xml_str)
for book in root.iter("book"):
bid = attrib(book, "id")
title = find_text(book, "title")
author = find_text(book, "author")
print(f" id={bid} title={title!r} author={author!r}")
# ── build + to_string ─────────────────────────────────────────────────────
print("\n--- build element tree ---")
note = new_element("note")
append_child(note, "to", "Alice")
append_child(note, "from", "Bob")
append_child(note, "body", "Don't forget the xml.etree demo!")
print(to_string(note))
# ── flatten_xml ───────────────────────────────────────────────────────────
print("\n--- flatten_xml ---")
cfg_xml = b"<config><database><host>localhost</host><port>5432</port></database></config>"
flat = flatten_xml(load_xml(cfg_xml))
for k, v in flat.items():
print(f" {k} = {v!r}")
# ── RSS build + parse ─────────────────────────────────────────────────────
print("\n--- RSS round-trip ---")
feed = RssFeed(title="Demo Blog", link="https://example.com", description="Test feed")
feed.items.append(RssItem(title="Hello", link="https://example.com/1", pub_date="Mon, 02 Dec 2028 00:00:00 GMT"))
feed.items.append(RssItem(title="World", link="https://example.com/2"))
rss_xml = build_rss(feed)
parsed = parse_rss(rss_xml.encode())
print(f" feed.title: {parsed.title!r}")
for item in parsed.items:
print(f" item: {item.title!r} → {item.link!r}")
# ── sitemap ───────────────────────────────────────────────────────────────
print("\n--- build_sitemap ---")
sm = build_sitemap([
{"loc": "https://example.com/", "priority": "1.0"},
{"loc": "https://example.com/about", "lastmod": "2028-12-01"},
])
print(sm[:200])
print("\n=== done ===")
For the lxml (PyPI) alternative — lxml.etree offers full XPath 1.0, XSLT, RelaxNG/XML Schema validation, and faster parsing than the stdlib — use lxml when you need full XPath (predicates, axes, functions), schema validation, or performance on very large documents; use xml.etree.ElementTree for zero-dependency XML parsing where the limited XPath subset is sufficient. Note: xml.etree.ElementTree is vulnerable to “billion laughs” and “quadratic blowup” XML attacks on untrusted input — use defusedxml (PyPI) as a safe drop-in replacement when parsing user-supplied XML. For the xmltodict (PyPI) alternative — xmltodict.parse(xml_bytes) converts XML to nested Python dicts and lists in one call — use xmltodict for rapid prototyping when you want to treat XML like JSON without writing a parser subclass; use xml.etree.ElementTree when you need precise control over namespace handling, partial streaming (iterparse), or when you must generate XML output as well as parse it. The Claude Skills 360 bundle includes xml.etree.ElementTree skill sets covering load_xml()/find_text()/iter_elements()/attrib() parse helpers, new_element()/append_child()/to_string() builder utilities, RssFeed/RssItem with parse_rss()/build_rss() RSS round-trip, strip_ns()/flatten_xml() namespace and flatten helpers, and build_sitemap() sitemap XML generator. Start with the free tier to try XML parsing patterns and xml.etree.ElementTree pipeline code generation.