Python’s email.headerregistry module provides typed header objects that the modern email.policy (Python 3.6+) uses when parsing messages. from email.headerregistry import Address, Group, HeaderRegistry. Key classes: Address(display_name="", username="", domain="", addr_spec=None) — represents a single RFC 5322 email address; properties: .display_name, .username, .domain, .addr_spec (user@domain); str(addr) gives the fully-formatted address. Group(display_name="", addresses=()) — RFC 5322 group syntax. HeaderRegistry — the callable mapping used by email.policy.default to create the right header class from a name-value pair; can be extended. Typed header attributes (when parsed with policy.default): msg["From"].addresses → tuple of Address; msg["To"].addresses; msg["Date"].datetime → datetime; msg["Content-Type"].content_type → "text/plain"; msg["Content-Type"].params → {"charset": "utf-8"}. Build an Address directly: Address("Alice Smith", addr_spec="[email protected]"). Build a header string: email.utils.formataddr(("Alice", "[email protected]")). Claude Code generates RFC-compliant address parsers, typed header inspectors, display-name extractors, domain-based address filters, and structured message header validators.
CLAUDE.md for email.headerregistry
## email.headerregistry Stack
- Stdlib: from email.headerregistry import Address, Group, HeaderRegistry
- from email import policy
- from email.parser import BytesParser
- Address: a = Address("Alice", addr_spec="[email protected]")
- str(a) # "Alice <[email protected]>"
- a.display_name # "Alice"
- a.addr_spec # "[email protected]"
- a.username # "alice"
- a.domain # "example.com"
- Group: g = Group("Team", [a1, a2])
- str(g) # "Team:Alice <[email protected]>,Bob <[email protected]>;"
- Parsed: msg = BytesParser(policy=policy.default).parsebytes(raw)
- msg["From"].addresses # tuple[Address, ...]
- msg["Date"].datetime # datetime
- msg["Content-Type"].content_type # "text/plain"
email.headerregistry Typed Header Pipeline
# app/emailheaderregistryutil.py — parse, inspect, filter, build, validate
from __future__ import annotations
import re
from dataclasses import dataclass, field
from datetime import datetime
from email import policy as _policy
from email.headerregistry import Address, Group, HeaderRegistry
from email.message import EmailMessage
from email.parser import BytesParser, Parser
from typing import Any
# ─────────────────────────────────────────────────────────────────────────────
# 1. Address construction and formatting
# ─────────────────────────────────────────────────────────────────────────────
def make_address(display_name: str = "",
addr_spec: str = "") -> Address:
"""
Create an Address from display name and addr_spec.
Example:
a = make_address("Alice Smith", "[email protected]")
print(str(a)) # "Alice Smith <[email protected]>"
"""
return Address(display_name=display_name, addr_spec=addr_spec)
def parse_address(text: str) -> Address | None:
"""
Parse a single RFC 5322 address string into an Address object.
Returns None on failure.
Example:
a = parse_address("Bob Jones <[email protected]>")
print(a.domain) # "example.com"
"""
raw = f"From: {text}\r\n\r\n"
try:
msg = BytesParser(policy=_policy.default).parsebytes(raw.encode())
addrs = msg["From"].addresses
return addrs[0] if addrs else None
except Exception:
return None
def format_address_list(addresses: "list[Address]") -> str:
"""
Format a list of Address objects as a comma-separated header value.
Example:
addrs = [make_address("Alice", "[email protected]"), make_address("Bob", "[email protected]")]
print(format_address_list(addrs)) # "Alice <[email protected]>, Bob <[email protected]>"
"""
return ", ".join(str(a) for a in addresses)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Typed header extraction from parsed messages
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class MessageHeaders:
subject: str
from_addr: list[Address]
to_addrs: list[Address]
cc_addrs: list[Address]
bcc_addrs: list[Address]
date: datetime | None
message_id: str
content_type: str
charset: str
reply_to: list[Address]
in_reply_to: str
def extract_headers(raw: "bytes | str",
pol: Any = _policy.default) -> MessageHeaders:
"""
Parse raw message and extract all standard headers into typed fields.
Example:
with open("msg.eml", "rb") as f:
hdrs = extract_headers(f.read())
print(hdrs.from_addr, hdrs.date)
"""
if isinstance(raw, bytes):
msg: EmailMessage = BytesParser(policy=pol).parsebytes(raw) # type: ignore
else:
msg = Parser(policy=pol).parsestr(raw) # type: ignore
def get_addrs(key: str) -> list[Address]:
hdr = msg.get(key)
if hdr is None:
return []
try:
return list(hdr.addresses)
except AttributeError:
return []
def get_date() -> datetime | None:
hdr = msg.get("date")
if hdr is None:
return None
try:
return hdr.datetime
except AttributeError:
return None
ct_hdr = msg.get("content-type")
content_type = ""
charset = "utf-8"
if ct_hdr is not None:
try:
content_type = ct_hdr.content_type
charset = ct_hdr.params.get("charset", "utf-8")
except AttributeError:
content_type = str(ct_hdr)
return MessageHeaders(
subject=msg.get("subject", ""),
from_addr=get_addrs("from"),
to_addrs=get_addrs("to"),
cc_addrs=get_addrs("cc"),
bcc_addrs=get_addrs("bcc"),
date=get_date(),
message_id=msg.get("message-id", ""),
content_type=content_type,
charset=charset,
reply_to=get_addrs("reply-to"),
in_reply_to=msg.get("in-reply-to", ""),
)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Address-based filters
# ─────────────────────────────────────────────────────────────────────────────
def filter_by_domain(addresses: "list[Address]",
domain: str) -> list[Address]:
"""
Return addresses whose domain matches (case-insensitive).
Example:
addrs = parse_addresses_from_raw(raw)
internal = filter_by_domain(addrs, "mycompany.com")
"""
d = domain.lower()
return [a for a in addresses if a.domain.lower() == d]
def extract_all_addresses(raw: "bytes | str") -> list[Address]:
"""
Extract all unique addresses from From/To/Cc/Bcc headers.
Example:
addrs = extract_all_addresses(raw_bytes)
for a in addrs:
print(a.addr_spec)
"""
hdrs = extract_headers(raw)
seen: dict[str, Address] = {}
for bucket in (hdrs.from_addr, hdrs.to_addrs,
hdrs.cc_addrs, hdrs.bcc_addrs):
for a in bucket:
key = a.addr_spec.lower()
if key not in seen:
seen[key] = a
return list(seen.values())
# ─────────────────────────────────────────────────────────────────────────────
# 4. Custom HeaderRegistry extension
# ─────────────────────────────────────────────────────────────────────────────
def custom_registry_policy(extra_headers: "dict[str, Any] | None" = None) -> Any:
"""
Build an email.policy.EmailPolicy with a custom HeaderRegistry that
adds user-defined header handlers. extra_headers maps lower-case
header names to header class factories.
Example:
pol = custom_registry_policy({"x-priority": None})
msg = BytesParser(policy=pol).parsebytes(raw)
"""
reg = HeaderRegistry()
if extra_headers:
for name, factory in extra_headers.items():
if factory is not None:
reg.map_to_type(name, factory)
return _policy.default.clone(header_factory=reg)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Header validation
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ValidationResult:
valid: bool
issues: list[str] = field(default_factory=list)
def validate_message_headers(raw: "bytes | str") -> ValidationResult:
"""
Check that a raw message has well-formed required headers and valid addresses.
Example:
result = validate_message_headers(raw_email_bytes)
if not result.valid:
for issue in result.issues:
print(issue)
"""
issues: list[str] = []
try:
hdrs = extract_headers(raw)
except Exception as e:
return ValidationResult(valid=False, issues=[f"parse error: {e}"])
if not hdrs.from_addr:
issues.append("Missing or empty From header")
if not hdrs.to_addrs and not hdrs.cc_addrs:
issues.append("No recipients in To or Cc")
if not hdrs.subject:
issues.append("Missing Subject header")
if not hdrs.message_id:
issues.append("Missing Message-ID header")
for addr in hdrs.from_addr + hdrs.to_addrs + hdrs.cc_addrs:
if not addr.domain or "." not in addr.domain:
issues.append(f"Suspicious address (no valid domain): {addr.addr_spec!r}")
if not addr.username:
issues.append(f"Suspicious address (no local part): {addr.addr_spec!r}")
return ValidationResult(valid=len(issues) == 0, issues=issues)
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== email.headerregistry demo ===")
sample = (
b"From: Alice Smith <[email protected]>\r\n"
b"To: Bob Jones <[email protected]>, [email protected]\r\n"
b"Cc: [email protected]\r\n"
b"Subject: Typed header demo\r\n"
b"Date: Mon, 01 Feb 2029 12:00:00 +0000\r\n"
b"Message-ID: <[email protected]>\r\n"
b"Content-Type: text/plain; charset=utf-8\r\n"
b"\r\n"
b"Hello!\r\n"
)
# ── Address construction ────────────────────────────────────────────────
print("\n--- Address ---")
a = make_address("Dave", "[email protected]")
print(f" str : {str(a)!r}")
print(f" display : {a.display_name!r}")
print(f" addr_spec : {a.addr_spec!r}")
print(f" username : {a.username!r}")
print(f" domain : {a.domain!r}")
# ── Group ───────────────────────────────────────────────────────────────
print("\n--- Group ---")
a1 = make_address("Alice", "[email protected]")
a2 = make_address("Bob", "[email protected]")
g = Group("Engineering", [a1, a2])
print(f" str: {str(g)!r}")
# ── extract_headers ────────────────────────────────────────────────────
print("\n--- extract_headers ---")
hdrs = extract_headers(sample)
print(f" subject : {hdrs.subject!r}")
print(f" from : {[str(a) for a in hdrs.from_addr]}")
print(f" to : {[str(a) for a in hdrs.to_addrs]}")
print(f" date : {hdrs.date}")
print(f" content_type: {hdrs.content_type!r}")
print(f" charset : {hdrs.charset!r}")
# ── filter_by_domain ───────────────────────────────────────────────────
print("\n--- filter_by_domain ---")
all_addrs = extract_all_addresses(sample)
example_addrs = filter_by_domain(all_addrs, "example.com")
print(f" all: {[a.addr_spec for a in all_addrs]}")
print(f" @example.com: {[a.addr_spec for a in example_addrs]}")
# ── validate_message_headers ───────────────────────────────────────────
print("\n--- validate_message_headers ---")
good = validate_message_headers(sample)
bad_raw = b"From: badaddr\r\nTo:\r\n\r\nbody"
bad = validate_message_headers(bad_raw)
print(f" good.valid: {good.valid} issues: {good.issues}")
print(f" bad.valid : {bad.valid} issues: {bad.issues}")
# ── parse_address ──────────────────────────────────────────────────────
print("\n--- parse_address ---")
for text in ["Eve <[email protected]>", "[email protected]", "bogus"]:
parsed = parse_address(text)
if parsed:
print(f" {text!r:35s} → {parsed.addr_spec!r}")
else:
print(f" {text!r:35s} → None")
print("\n=== done ===")
For the email.utils stdlib companion — email.utils.parseaddr("Alice <[email protected]>") → ("Alice", "[email protected]") and email.utils.getaddresses(["Alice <[email protected]>, Bob <[email protected]>"]) → [("Alice", "[email protected]"), ("Bob", "[email protected]")] provide quick stdlib address tuple parsing without the full email.headerregistry.Address object — use email.utils for simple display-name/addr-spec extraction in scripts; use email.headerregistry.Address with policy.default when you need structured objects, Group support, or full RFC 5322 structural validation. For the flanker (PyPI) alternative — flanker.addresslib.address.parse("Alice <[email protected]>") provides production-grade RFC 5321/5322 address parsing with deliverability checks, role-address detection, and mailbox normalization — use flanker in ESPs and inbox-validation services that process untrusted user input at scale; use email.headerregistry in standard Python tools that trust the input envelope. The Claude Skills 360 bundle includes email.headerregistry skill sets covering make_address()/parse_address()/format_address_list() address builders, MessageHeaders/extract_headers() typed header extractor, filter_by_domain()/extract_all_addresses() address filters, custom_registry_policy() registry extension, and ValidationResult/validate_message_headers() header validator. Start with the free tier to try typed header patterns and email.headerregistry pipeline code generation.