Python’s mailbox module provides classes to read and write email storage formats: Maildir, mbox, Babyl, MH, and MMDF. import mailbox. Maildir: mb = mailbox.Maildir("~/Mail") — Maildir++ directory tree; messages accessed by opaque key strings. mbox: mb = mailbox.mbox("archive.mbox") — Unix mbox single file; keys are sequential integers. add: key = mb.add(msg) — accepts a Message, email.message.Message, str, or bytes; returns the new key. remove / __delitem__: mb.remove(key) or del mb[key]. get / __getitem__: mb[key] → mailbox.Message subclass. iterkeys: mb.iterkeys() → iterator of keys; list(mb.keys()). items: mb.items() → (key, message) pairs. values: mb.values() → messages. flush: mb.flush() — write buffered changes. lock / unlock: mb.lock(); mb.unlock() — exclusive file locking. close: mb.close() — flush + unlock + close. Message subclasses: MaildirMessage (has get_flags(), set_flags(), get_subdir()), mboxMessage (has get_from(), set_flags()). All messages expose the email.message.Message API: msg["Subject"], msg.get_payload(), msg.is_multipart(), msg.walk(). Claude Code generates mailbox batch processors, mail migrators, archive searchers, and message exporters.
CLAUDE.md for mailbox
## mailbox Stack
- Stdlib: import mailbox, email
- Maildir: mb = mailbox.Maildir("~/Maildir")
- mbox: mb = mailbox.mbox("archive.mbox")
- Add: key = mb.add(email_message_obj)
- Get: msg = mb[key]
- Delete: mb.remove(key); mb.flush()
- Iterate: for key, msg in mb.items(): ...
- Lock: mb.lock(); ...; mb.unlock()
- Context: with mailbox.mbox("file") as mb: ...
mailbox Email Archive Pipeline
# app/mailboxutil.py — open, iterate, search, export, convert, stats
from __future__ import annotations
import email
import email.policy
import mailbox
import os
import re
from dataclasses import dataclass, field
from datetime import datetime, timezone
from email.utils import parsedate_to_datetime
from pathlib import Path
from typing import Any, Iterator
# ─────────────────────────────────────────────────────────────────────────────
# 1. Open helpers
# ─────────────────────────────────────────────────────────────────────────────
def open_mbox(path: str | Path, create: bool = False) -> mailbox.mbox:
"""
Open an mbox file. create=True makes the file if it doesn't exist.
Example:
with open_mbox("archive.mbox") as mb:
for key, msg in mb.items():
print(msg["Subject"])
"""
return mailbox.mbox(str(path), create=create)
def open_maildir(path: str | Path, create: bool = False) -> mailbox.Maildir:
"""
Open a Maildir directory. create=True creates the subdirectory layout.
Example:
with open_maildir("~/Maildir") as mb:
print(len(mb))
"""
return mailbox.Maildir(str(Path(path).expanduser()), create=create)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Message parsing helpers
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class MessageSummary:
key: str
subject: str
sender: str
date: datetime | None
size: int # raw bytes
has_attachments: bool
def __str__(self) -> str:
date_str = self.date.strftime("%Y-%m-%d %H:%M") if self.date else "?"
attach = " [+attach]" if self.has_attachments else ""
return (f"[{self.key}] {date_str} "
f"From: {self.sender[:30]} "
f"Subject: {self.subject[:50]}{attach}")
def _parse_date(msg: mailbox.Message) -> datetime | None:
raw = msg.get("Date", "")
if not raw:
return None
try:
return parsedate_to_datetime(raw)
except Exception:
return None
def _has_attachments(msg: mailbox.Message) -> bool:
if not msg.is_multipart():
return False
for part in msg.walk():
cd = part.get("Content-Disposition", "")
if "attachment" in cd.lower():
return True
return False
def summarize_message(key: str, msg: mailbox.Message) -> MessageSummary:
"""Build a MessageSummary from a mailbox message."""
raw = msg.as_bytes()
return MessageSummary(
key=str(key),
subject=msg.get("Subject", "(no subject)"),
sender=msg.get("From", "(unknown)"),
date=_parse_date(msg),
size=len(raw),
has_attachments=_has_attachments(msg),
)
def get_text_body(msg: mailbox.Message) -> str:
"""
Extract the plain-text body from a message (first text/plain part).
Example:
body = get_text_body(mb["key"])
"""
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
payload = part.get_payload(decode=True)
charset = part.get_content_charset() or "utf-8"
if isinstance(payload, bytes):
return payload.decode(charset, errors="replace")
else:
payload = msg.get_payload(decode=True)
charset = msg.get_content_charset() or "utf-8"
if isinstance(payload, bytes):
return payload.decode(charset, errors="replace")
return ""
# ─────────────────────────────────────────────────────────────────────────────
# 3. Iterators and search
# ─────────────────────────────────────────────────────────────────────────────
def iter_summaries(
mb: mailbox.Mailbox,
limit: int | None = None,
) -> Iterator[MessageSummary]:
"""
Yield MessageSummary for each message in the mailbox.
Example:
with open_mbox("archive.mbox") as mb:
for summary in iter_summaries(mb, limit=10):
print(summary)
"""
count = 0
for key, msg in mb.items():
if limit is not None and count >= limit:
break
try:
yield summarize_message(key, msg)
count += 1
except Exception:
pass
def search_messages(
mb: mailbox.Mailbox,
subject_pattern: str | None = None,
sender_pattern: str | None = None,
body_pattern: str | None = None,
after: datetime | None = None,
before: datetime | None = None,
limit: int = 100,
) -> list[MessageSummary]:
"""
Search a mailbox by subject, sender, body regex, or date range.
Returns matching MessageSummary objects.
Example:
results = search_messages(mb, subject_pattern="invoice", limit=20)
for r in results:
print(r)
"""
sub_re = re.compile(subject_pattern, re.I) if subject_pattern else None
from_re = re.compile(sender_pattern, re.I) if sender_pattern else None
body_re = re.compile(body_pattern, re.I) if body_pattern else None
results: list[MessageSummary] = []
for key, msg in mb.items():
if len(results) >= limit:
break
try:
summary = summarize_message(key, msg)
except Exception:
continue
if sub_re and not sub_re.search(summary.subject): continue
if from_re and not from_re.search(summary.sender): continue
if after and summary.date and summary.date < after: continue
if before and summary.date and summary.date > before: continue
if body_re:
body = get_text_body(msg)
if not body_re.search(body):
continue
results.append(summary)
return results
# ─────────────────────────────────────────────────────────────────────────────
# 4. Export and conversion
# ─────────────────────────────────────────────────────────────────────────────
def export_to_eml(
mb: mailbox.Mailbox,
output_dir: str | Path,
keys: list[str] | None = None,
) -> int:
"""
Export messages to individual .eml files in output_dir.
keys: list of keys to export; None = all.
Returns count of exported files.
Example:
with open_mbox("archive.mbox") as mb:
n = export_to_eml(mb, "eml_export/")
print(f"Exported {n} messages")
"""
out = Path(output_dir)
out.mkdir(parents=True, exist_ok=True)
count = 0
target_keys = set(str(k) for k in keys) if keys else None
for key, msg in mb.items():
if target_keys and str(key) not in target_keys:
continue
fname = out / f"{key}.eml"
fname.write_bytes(msg.as_bytes())
count += 1
return count
def convert_mbox_to_maildir(
mbox_path: str | Path,
maildir_path: str | Path,
create: bool = True,
) -> int:
"""
Convert an mbox file to a Maildir directory.
Returns number of messages converted.
Example:
n = convert_mbox_to_maildir("archive.mbox", "~/Maildir/Archive")
print(f"Converted {n} messages")
"""
count = 0
with open_mbox(mbox_path) as src:
with open_maildir(maildir_path, create=create) as dst:
dst.lock()
try:
for msg in src.values():
md_msg = mailbox.MaildirMessage(msg)
dst.add(md_msg)
count += 1
dst.flush()
finally:
dst.unlock()
return count
def convert_maildir_to_mbox(
maildir_path: str | Path,
mbox_path: str | Path,
create: bool = True,
) -> int:
"""
Convert a Maildir directory to an mbox file.
Example:
n = convert_maildir_to_mbox("~/Maildir/Archive", "archive.mbox")
"""
count = 0
with open_maildir(maildir_path) as src:
with open_mbox(mbox_path, create=create) as dst:
dst.lock()
try:
for msg in src.values():
mbox_msg = mailbox.mboxMessage(msg)
dst.add(mbox_msg)
count += 1
dst.flush()
finally:
dst.unlock()
return count
# ─────────────────────────────────────────────────────────────────────────────
# 5. Mailbox statistics
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class MailboxStats:
message_count: int
total_bytes: int
senders: dict[str, int] # {sender: count}
date_range: tuple[datetime | None, datetime | None]
has_attachments: int
def __str__(self) -> str:
earliest = self.date_range[0].strftime("%Y-%m-%d") if self.date_range[0] else "?"
latest = self.date_range[1].strftime("%Y-%m-%d") if self.date_range[1] else "?"
top = sorted(self.senders.items(), key=lambda t: -t[1])[:3]
top_str = ", ".join(f"{s}({c})" for s, c in top)
return (
f"messages: {self.message_count}\n"
f"total_bytes: {self.total_bytes:,d}\n"
f"date_range: {earliest} → {latest}\n"
f"attachments: {self.has_attachments}\n"
f"top senders: {top_str}"
)
def compute_stats(mb: mailbox.Mailbox) -> MailboxStats:
"""
Compute statistics for an open mailbox.
Example:
with open_mbox("archive.mbox") as mb:
stats = compute_stats(mb)
print(stats)
"""
senders: dict[str, int] = {}
total_bytes = 0
earliest: datetime | None = None
latest: datetime | None = None
attachments = 0
for msg in mb.values():
raw_bytes = len(msg.as_bytes())
total_bytes += raw_bytes
sender = msg.get("From", "(unknown)")
senders[sender] = senders.get(sender, 0) + 1
dt = _parse_date(msg)
if dt:
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
if earliest is None or dt < earliest:
earliest = dt
if latest is None or dt > latest:
latest = dt
if _has_attachments(msg):
attachments += 1
return MailboxStats(
message_count=len(mb),
total_bytes=total_bytes,
senders=senders,
date_range=(earliest, latest),
has_attachments=attachments,
)
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
print("=== mailbox demo ===")
def _make_msg(subject: str, sender: str, body: str, date_str: str) -> mailbox.mboxMessage:
msg = MIMEText(body, "plain", "utf-8")
msg["Subject"] = subject
msg["From"] = sender
msg["To"] = "[email protected]"
msg["Date"] = date_str
return mailbox.mboxMessage(msg)
with tempfile.TemporaryDirectory() as tmpdir:
mbox_path = os.path.join(tmpdir, "test.mbox")
maildir_path = os.path.join(tmpdir, "test_maildir")
# ── create + populate mbox ─────────────────────────────────────────────
print("\n--- create mbox and add messages ---")
with open_mbox(mbox_path, create=True) as mb:
mb.lock()
msgs = [
_make_msg("Invoice #1001", "[email protected]",
"Please find attached your invoice.", "Mon, 1 Jan 2024 10:00:00 +0000"),
_make_msg("Meeting tomorrow", "[email protected]",
"Don't forget our team meeting.", "Tue, 2 Jan 2024 09:00:00 +0000"),
_make_msg("Invoice #1002", "[email protected]",
"Second invoice for Q4 services.", "Wed, 3 Jan 2024 11:00:00 +0000"),
_make_msg("Lunch?", "[email protected]",
"Are you free for lunch on Friday?", "Thu, 4 Jan 2024 12:00:00 +0000"),
]
for m in msgs:
mb.add(m)
mb.flush()
mb.unlock()
print(f" created mbox with {len(msgs)} messages")
# ── iter_summaries ─────────────────────────────────────────────────────
print("\n--- iter_summaries ---")
with open_mbox(mbox_path) as mb:
for s in iter_summaries(mb):
print(f" {s}")
# ── search_messages ────────────────────────────────────────────────────
print("\n--- search_messages (subject='invoice') ---")
with open_mbox(mbox_path) as mb:
results = search_messages(mb, subject_pattern="invoice")
for r in results:
print(f" {r}")
# ── compute_stats ──────────────────────────────────────────────────────
print("\n--- compute_stats ---")
with open_mbox(mbox_path) as mb:
stats = compute_stats(mb)
print(stats)
# ── convert mbox → Maildir ─────────────────────────────────────────────
print("\n--- convert_mbox_to_maildir ---")
n = convert_mbox_to_maildir(mbox_path, maildir_path, create=True)
print(f" converted {n} messages to Maildir")
# ── read back from Maildir ─────────────────────────────────────────────
print("\n--- Maildir read-back ---")
with open_maildir(maildir_path) as mb:
print(f" Maildir message count: {len(mb)}")
for s in iter_summaries(mb, limit=2):
print(f" {s}")
# ── export to eml ──────────────────────────────────────────────────────
print("\n--- export_to_eml ---")
eml_dir = os.path.join(tmpdir, "eml")
with open_mbox(mbox_path) as mb:
count = export_to_eml(mb, eml_dir)
eml_files = list(Path(eml_dir).glob("*.eml"))
print(f" exported {count} messages → {len(eml_files)} .eml files")
print("\n=== done ===")
For the email / email.parser alternative — email.message_from_bytes(), email.message_from_string(), and email.parser.BytesParser parse individual RFC 2822 message bytes into email.message.Message objects with full MIME support — use email directly when you have raw message bytes or strings and need header access, MIME part iteration, or content decoding for a single message; use mailbox when you need to read, write, delete, or iterate over a collection of messages stored in a mailbox format (mbox, Maildir, MH) and want the format’s locking, indexing, and message management handled automatically. For the imaplib / smtplib alternative — imaplib.IMAP4_SSL connects to a live IMAP server to fetch, search, flag, and move messages remotely; smtplib.SMTP sends new messages — use imaplib when you need to interact with a live mail server (fetch inbox, search by server-side criteria, mark read/deleted, move to folders); use mailbox when you have local mailbox files on disk and don’t need a network connection. The Claude Skills 360 bundle includes mailbox skill sets covering open_mbox()/open_maildir() openers, MessageSummary with summarize_message()/get_text_body(), iter_summaries()/search_messages() iterators, export_to_eml()/convert_mbox_to_maildir()/convert_maildir_to_mbox() export tools, and MailboxStats with compute_stats(). Start with the free tier to try email archive processing patterns and mailbox pipeline code generation.