Python’s xml.dom.minidom module implements the W3C Document Object Model (DOM) API for XML — parse a document into a tree of nodes and traverse or mutate it. from xml.dom import minidom. Parse: doc = minidom.parse("file.xml") or doc = minidom.parseString(b"<root/>") → Document. Build: impl = minidom.getDOMImplementation(); doc = impl.createDocument(None, "root", None). Create nodes: el = doc.createElement("item"); txt = doc.createTextNode("hello"); el.appendChild(txt). Attributes: el.getAttribute("id"), el.setAttribute("class", "active"). Find: doc.getElementsByTagName("item") → NodeList; doc.documentElement → root element. Walk: node.childNodes, node.parentNode, node.firstChild, node.nextSibling. Serialize: doc.toxml(encoding="utf-8"), doc.toprettyxml(indent=" "). Node types: ELEMENT_NODE=1, TEXT_NODE=3, COMMENT_NODE=8. Note: xml.dom.minidom does not protect against malicious XML; use defusedxml (PyPI) for untrusted input. Claude Code generates config file parsers, document transformers, SOAP message builders, namespace-aware processors, and XML diff tools.
CLAUDE.md for xml.dom.minidom
## xml.dom.minidom Stack
- Stdlib: from xml.dom import minidom
- Parse: doc = minidom.parse("f.xml") # from file
- doc = minidom.parseString(xml_bytes) # from bytes/str
- Root: root = doc.documentElement
- Find: doc.getElementsByTagName("item") # NodeList
- Attrs: el.getAttribute("id"); el.setAttribute("k","v")
- Build: impl = minidom.getDOMImplementation()
- doc = impl.createDocument(None,"root",None)
- Serial: doc.toprettyxml(indent=" ")
- Note: wrap in defusedxml.minidom for untrusted XML
xml.dom.minidom DOM Pipeline
# app/domutil.py — parser, builder, walker, transformer, differ
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Callable, Iterator
from xml.dom import minidom
from xml.dom.minidom import Document, Element, Node
# ─────────────────────────────────────────────────────────────────────────────
# 1. Parse helpers
# ─────────────────────────────────────────────────────────────────────────────
def parse_file(path: str) -> Document:
"""
Parse an XML file and return its Document.
Example:
doc = parse_file("config.xml")
root = doc.documentElement
"""
return minidom.parse(path)
def parse_string(xml: "str | bytes") -> Document:
"""
Parse an XML string and return its Document.
Example:
doc = parse_string("<catalog><book id='1'><title>Python</title></book></catalog>")
"""
if isinstance(xml, str):
xml = xml.encode("utf-8")
return minidom.parseString(xml)
def root(doc: Document) -> Element:
"""Return the document's root element."""
return doc.documentElement
# ─────────────────────────────────────────────────────────────────────────────
# 2. Node navigation and query
# ─────────────────────────────────────────────────────────────────────────────
def child_elements(node: Node) -> list[Element]:
"""
Return only element children of node (skipping text/comment nodes).
Example:
for child in child_elements(root):
print(child.tagName)
"""
return [n for n in node.childNodes if n.nodeType == Node.ELEMENT_NODE]
def text_content(node: Node) -> str:
"""
Return the concatenated text content of a node (like JS textContent).
Example:
title = doc.getElementsByTagName("title")[0]
print(text_content(title)) # "Python"
"""
parts: list[str] = []
for child in node.childNodes:
if child.nodeType == Node.TEXT_NODE:
parts.append(child.data)
elif child.nodeType == Node.ELEMENT_NODE:
parts.append(text_content(child))
return "".join(parts).strip()
def find_all(doc: "Document | Element", tag: str) -> list[Element]:
"""
Return all elements with the given tag name anywhere in the subtree.
Example:
books = find_all(doc, "book")
"""
return list(doc.getElementsByTagName(tag))
def find_first(doc: "Document | Element", tag: str) -> "Element | None":
"""
Return the first element with the given tag name, or None.
Example:
title = find_first(doc, "title")
print(text_content(title) if title else "(missing)")
"""
nodes = doc.getElementsByTagName(tag)
return nodes[0] if nodes.length > 0 else None
def attrs_dict(el: Element) -> dict[str, str]:
"""
Return all attributes of an element as a plain dict.
Example:
d = attrs_dict(book_element)
print(d) # {"id": "1", "lang": "en"}
"""
result: dict[str, str] = {}
attrs = el.attributes
for i in range(attrs.length):
item = attrs.item(i)
result[item.name] = item.value
return result
def walk_elements(node: Node) -> Iterator[Element]:
"""
Depth-first iterator over all element nodes in the subtree.
Example:
for el in walk_elements(doc):
print(el.tagName, attrs_dict(el))
"""
if node.nodeType == Node.ELEMENT_NODE:
yield node # type: ignore[misc]
for child in node.childNodes:
yield from walk_elements(child)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Document builder
# ─────────────────────────────────────────────────────────────────────────────
class DomBuilder:
"""
Fluent API for building XML documents programmatically.
Example:
doc = (DomBuilder("catalog")
.attr("version", "1.0")
.child("book", {"id": "1"})
.text("title", "Python Cookbook")
.text("author", "Beazley")
.up()
.child("book", {"id": "2"})
.text("title", "Fluent Python")
.build())
print(doc.toprettyxml(indent=" "))
"""
def __init__(self, root_tag: str, ns_uri: str = "") -> None:
impl = minidom.getDOMImplementation()
if ns_uri:
self._doc = impl.createDocument(ns_uri, root_tag, None)
else:
self._doc = impl.createDocument(None, root_tag, None)
self._current: Element = self._doc.documentElement
self._stack: list[Element] = [self._current]
def attr(self, name: str, value: str) -> "DomBuilder":
"""Set an attribute on the current element."""
self._current.setAttribute(name, value)
return self
def child(self, tag: str, attrs: "dict[str, str] | None" = None) -> "DomBuilder":
"""Add a child element and descend into it."""
el = self._doc.createElement(tag)
if attrs:
for k, v in attrs.items():
el.setAttribute(k, v)
self._current.appendChild(el)
self._stack.append(el)
self._current = el
return self
def text(self, tag: str, content: str, attrs: "dict[str, str] | None" = None) -> "DomBuilder":
"""Add a child element containing a text node."""
el = self._doc.createElement(tag)
if attrs:
for k, v in attrs.items():
el.setAttribute(k, v)
el.appendChild(self._doc.createTextNode(content))
self._current.appendChild(el)
return self
def comment(self, text: str) -> "DomBuilder":
"""Add a comment node to the current element."""
self._current.appendChild(self._doc.createComment(text))
return self
def up(self) -> "DomBuilder":
"""Ascend to the parent element."""
if len(self._stack) > 1:
self._stack.pop()
self._current = self._stack[-1]
return self
def build(self) -> Document:
"""Return the completed Document."""
return self._doc
# ─────────────────────────────────────────────────────────────────────────────
# 4. Serialization helpers
# ─────────────────────────────────────────────────────────────────────────────
def to_xml_string(doc: Document, pretty: bool = False, indent: str = " ") -> str:
"""
Serialize a Document to an XML string.
pretty=True adds indentation; set pretty=False for compact output.
Example:
xml_str = to_xml_string(doc, pretty=True)
print(xml_str)
"""
if pretty:
raw = doc.toprettyxml(indent=indent, encoding=None)
# toprettyxml adds a declaration line; strip it if unwanted
return raw if isinstance(raw, str) else raw.decode("utf-8")
raw = doc.toxml(encoding=None)
return raw if isinstance(raw, str) else raw.decode("utf-8")
def clean_pretty_xml(doc: Document, indent: str = " ") -> str:
"""
Return pretty-printed XML with blank lines removed (toprettyxml artifact).
Example:
print(clean_pretty_xml(doc))
"""
raw = doc.toprettyxml(indent=indent)
lines = [line for line in raw.split("\n") if line.strip()]
return "\n".join(lines)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Transform / diff helpers
# ─────────────────────────────────────────────────────────────────────────────
def element_to_dict(el: Element) -> dict[str, Any]:
"""
Convert an Element and its descendant text nodes into a plain dict.
Attributes become a special '@' key; child elements are nested.
Example:
d = element_to_dict(doc.documentElement)
print(d)
"""
result: dict[str, Any] = {}
# Attributes
a = attrs_dict(el)
if a:
result["@"] = a
# Children
children = child_elements(el)
if children:
for child in children:
key = child.tagName
val = element_to_dict(child)
if key in result:
if not isinstance(result[key], list):
result[key] = [result[key]]
result[key].append(val)
else:
result[key] = val
else:
text = text_content(el)
if text:
result["#text"] = text
return result
def diff_elements(a: Element, b: Element, path: str = "") -> list[str]:
"""
Return a list of differences between two element trees.
Example:
diffs = diff_elements(doc1.documentElement, doc2.documentElement)
for d in diffs:
print(d)
"""
diffs: list[str] = []
tag = path or a.tagName
# Compare attributes
aa = attrs_dict(a)
ba = attrs_dict(b)
for k in set(aa) | set(ba):
if aa.get(k) != ba.get(k):
diffs.append(f"{tag}[@{k}]: {aa.get(k)!r} → {ba.get(k)!r}")
# Compare text (leaf nodes)
ac = child_elements(a)
bc = child_elements(b)
if not ac and not bc:
ta, tb = text_content(a), text_content(b)
if ta != tb:
diffs.append(f"{tag}/text: {ta!r} → {tb!r}")
return diffs
# Compare child elements by tag name (simple ordered comparison)
a_tags = [c.tagName for c in ac]
b_tags = [c.tagName for c in bc]
if a_tags != b_tags:
diffs.append(f"{tag}/children: {a_tags} → {b_tags}")
for ca, cb in zip(ac, bc):
if ca.tagName == cb.tagName:
diffs.extend(diff_elements(ca, cb, path=f"{tag}/{ca.tagName}"))
return diffs
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== xml.dom.minidom demo ===")
# ── parse_string + queries ─────────────────────────────────────────────────
xml_src = """<?xml version="1.0"?>
<catalog>
<book id="1" lang="en">
<title>Python Cookbook</title>
<author>Beazley</author>
</book>
<book id="2" lang="en">
<title>Fluent Python</title>
<author>Ramalho</author>
</book>
</catalog>"""
print("\n--- parse_string + find_all ---")
doc = parse_string(xml_src)
books = find_all(doc, "book")
for book in books:
bid = book.getAttribute("id")
title = text_content(find_first(book, "title"))
author = text_content(find_first(book, "author"))
print(f" Book {bid}: {title!r} by {author!r}")
# ── attrs_dict ────────────────────────────────────────────────────────────
print("\n--- attrs_dict ---")
first_book = books[0]
print(f" attrs: {attrs_dict(first_book)}")
# ── walk_elements ─────────────────────────────────────────────────────────
print("\n--- walk_elements ---")
for el in walk_elements(doc):
depth = 0
node = el.parentNode
while node and node.nodeType == Node.ELEMENT_NODE:
depth += 1
node = node.parentNode
print(f" {' ' * depth}{el.tagName}")
# ── DomBuilder ────────────────────────────────────────────────────────────
print("\n--- DomBuilder ---")
built = (DomBuilder("library")
.attr("version", "2.0")
.comment("Generated by DomBuilder")
.child("shelf", {"name": "A"})
.text("title", "Design Patterns", {"genre": "CS"})
.text("title", "Clean Code", {"genre": "CS"})
.up()
.child("shelf", {"name": "B"})
.text("title", "The Pragmatic Programmer")
.build())
print(clean_pretty_xml(built))
# ── element_to_dict ────────────────────────────────────────────────────────
print("\n--- element_to_dict ---")
import json
d = element_to_dict(doc.documentElement)
print(json.dumps(d, indent=2)[:300])
# ── diff_elements ──────────────────────────────────────────────────────────
print("\n--- diff_elements ---")
xml2 = xml_src.replace("Beazley", "Beazley & Jones").replace('id="1"', 'id="10"')
doc2 = parse_string(xml2)
diffs = diff_elements(doc.documentElement, doc2.documentElement)
for d_str in diffs:
print(f" DIFF: {d_str}")
print("\n=== done ===")
For the xml.etree.ElementTree alternative — ET.parse()/ET.fromstring() provide a simpler, lighter-weight tree API that is more Pythonic than the W3C DOM (element.text, element.attrib, element.findall("./item")) — prefer ElementTree for most new XML processing tasks; use xml.dom.minidom when you need full W3C DOM compliance, comment/CDATA node manipulation, toprettyxml serialization, or are interfacing with DOM-centric JavaScript tooling. For the lxml (PyPI) alternative — lxml.etree provides the fastest XML parsing in Python (wraps libxml2), XPath 1.0, XSLT 1.0, schema validation, and a superset of the ElementTree API — use lxml for production XML processing where performance, XPath queries, or schema validation matter; use xml.dom.minidom for lightweight zero-dependency DOM work. The Claude Skills 360 bundle includes xml.dom.minidom skill sets covering parse_file()/parse_string()/root() parsers, child_elements()/text_content()/find_all()/find_first()/attrs_dict()/walk_elements() navigation, DomBuilder fluent API with attr()/child()/text()/comment()/up()/build(), serializers to_xml_string()/clean_pretty_xml(), and element_to_dict()/diff_elements() transformers. Start with the free tier to try DOM XML patterns and xml.dom.minidom pipeline code generation.