Blog / AI / Claude Code for mimetypes: Python MIME Type Detection

Claude Code for mimetypes: Python MIME Type Detection

Published: September 16, 2028

•

Read time: 5 min read

•

By: Claude Skills 360

Python’s mimetypes module maps file extensions to MIME types and vice versa. import mimetypes. guess_type: mime, enc = mimetypes.guess_type("report.pdf") → ("application/pdf", None); ("archive.tar.gz", None) → ("application/x-tar", "gzip") — enc is the content-encoding. guess_extension: mimetypes.guess_extension("image/png") → ".png" (may vary). guess_all_extensions: mimetypes.guess_all_extensions("text/html") → [".html", ".htm", ".shtml"]. add_type: mimetypes.add_type("application/x-parquet", ".parquet") — register custom types. types_map: mimetypes.types_map → {".ext": "mime/type"} dict. suffix_map: {".gz": ".tgz-like compound extension"}. encodings_map: {".gz": "gzip", ".bz2": "bzip2"}. MimeTypes class: mt = mimetypes.MimeTypes() — isolated instance; mt.add_type(...) doesn’t affect global state. init: mimetypes.init() — (re)loads from system files plus built-in table. strict=False: guess_type(url, strict=False) — also checks Windows registry. mimetypes.guess_type accepts full URLs: mimetypes.guess_type("http://example.com/file.json") → ("application/json", None). Claude Code generates HTTP file servers with correct Content-Type headers, upload validators, MIME-based routing tables, and media-type normalisers.

CLAUDE.md for mimetypes

## mimetypes Stack
- Stdlib: import mimetypes
- Detect: mime, enc = mimetypes.guess_type(path_or_url)
- Ext:    ext = mimetypes.guess_extension(mime_type)
- Custom: mimetypes.add_type("application/x-custom", ".custom")
- Map:    mimetypes.types_map  # {".ext": "mime/type"}
- Isolated: mt = mimetypes.MimeTypes(); mt.add_type(m, e)

mimetypes MIME Detection Pipeline

# app/mimetypeutil.py — detect, validate, serve, batch, custom registry
from __future__ import annotations

import mimetypes
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any


# ─────────────────────────────────────────────────────────────────────────────
# 1. Detection helpers
# ─────────────────────────────────────────────────────────────────────────────

@dataclass
class MIMEInfo:
    path:          str
    mime_type:     str | None   # e.g. "application/pdf"
    encoding:      str | None   # e.g. "gzip" or None
    is_text:       bool
    is_binary:     bool
    main_type:     str          # "application", "text", "image", ...
    sub_type:      str          # "pdf", "plain", "png", ...
    extensions:    list[str]    # other known extensions for this type

    def __str__(self) -> str:
        enc = f" ({self.encoding})" if self.encoding else ""
        return f"{self.mime_type}{enc}  [{', '.join(self.extensions[:3])}]"


def detect(path: str | Path) -> MIMEInfo:
    """
    Detect MIME type for a file path or URL.

    Example:
        info = detect("report.pdf")
        print(info.mime_type)   # "application/pdf"
        info = detect("archive.tar.gz")
        print(info.mime_type, info.encoding)   # "application/x-tar", "gzip"
    """
    p = str(path)
    mime, enc = mimetypes.guess_type(p)
    main, sub = (mime or "application/octet-stream").split("/", 1)
    is_text = main == "text" or sub in ("json", "xml", "javascript", "csv")
    exts = mimetypes.guess_all_extensions(mime or "") if mime else []
    return MIMEInfo(
        path=p,
        mime_type=mime,
        encoding=enc,
        is_text=is_text,
        is_binary=not is_text,
        main_type=main,
        sub_type=sub,
        extensions=exts,
    )


def content_type_header(path: str | Path, charset: str = "utf-8") -> str:
    """
    Return a full Content-Type header value for a file path.
    Appends charset for text types.

    Example:
        content_type_header("style.css")   # "text/css; charset=utf-8"
        content_type_header("data.bin")    # "application/octet-stream"
    """
    info = detect(path)
    mime = info.mime_type or "application/octet-stream"
    if info.is_text:
        return f"{mime}; charset={charset}"
    return mime


def is_safe_upload(filename: str, allowed_mimes: set[str]) -> bool:
    """
    Return True if the filename's detected MIME type is in allowed_mimes.

    Example:
        allowed = {"image/jpeg", "image/png", "application/pdf"}
        is_safe_upload("photo.jpg", allowed)   # True
        is_safe_upload("script.exe", allowed)  # False
    """
    mime, _ = mimetypes.guess_type(filename)
    return mime in allowed_mimes if mime else False


def extension_for(mime_type: str) -> str | None:
    """
    Return the canonical extension for a MIME type, or None.

    Example:
        extension_for("image/png")         # ".png"
        extension_for("application/json")  # ".json"
    """
    return mimetypes.guess_extension(mime_type)


# ─────────────────────────────────────────────────────────────────────────────
# 2. Batch operations
# ─────────────────────────────────────────────────────────────────────────────

def detect_directory(
    directory: str | Path,
    recursive: bool = False,
) -> dict[str, MIMEInfo]:
    """
    Detect MIME types for all files in a directory.
    Returns {relative_path: MIMEInfo}.

    Example:
        types = detect_directory("/tmp/uploads")
        for path, info in types.items():
            print(f"{path}: {info.mime_type}")
    """
    root = Path(directory)
    glob = "**/*" if recursive else "*"
    result: dict[str, MIMEInfo] = {}
    for p in root.glob(glob):
        if p.is_file():
            result[str(p.relative_to(root))] = detect(p)
    return result


def group_by_type(paths: list[str | Path]) -> dict[str, list[str]]:
    """
    Group file paths by detected MIME main type (e.g. "image", "text", "application").

    Example:
        groups = group_by_type(Path("/tmp").glob("*"))
        print(groups.get("image", []))
    """
    groups: dict[str, list[str]] = {}
    for p in paths:
        info = detect(p)
        groups.setdefault(info.main_type, []).append(str(p))
    return groups


def filter_by_mime(paths: list[str | Path], mime_prefix: str) -> list[str]:
    """
    Return paths whose MIME type starts with mime_prefix.

    Example:
        images = filter_by_mime(all_files, "image/")
        text_files = filter_by_mime(all_files, "text/")
    """
    return [str(p) for p in paths
            if (detect(p).mime_type or "").startswith(mime_prefix)]


# ─────────────────────────────────────────────────────────────────────────────
# 3. Custom MIME registry
# ─────────────────────────────────────────────────────────────────────────────

# Well-known modern types not always in the system database
_CUSTOM_TYPES: list[tuple[str, str]] = [
    ("application/x-parquet",    ".parquet"),
    ("application/x-arrow",      ".arrow"),
    ("application/x-ndjson",     ".ndjson"),
    ("application/x-jsonlines",  ".jsonl"),
    ("application/wasm",         ".wasm"),
    ("text/x-python",            ".py"),
    ("text/x-toml",              ".toml"),
    ("text/x-yaml",              ".yml"),
    ("text/x-yaml",              ".yaml"),
    ("image/webp",               ".webp"),
    ("image/avif",               ".avif"),
    ("font/woff",                ".woff"),
    ("font/woff2",               ".woff2"),
]


def register_modern_types() -> None:
    """
    Register commonly-missing modern MIME types into the global mimetypes db.
    Call once at startup.

    Example:
        register_modern_types()
        print(mimetypes.guess_type("data.parquet"))   # "application/x-parquet"
    """
    for mime, ext in _CUSTOM_TYPES:
        mimetypes.add_type(mime, ext)


def make_custom_db(
    extra_types: list[tuple[str, str]] | None = None,
) -> mimetypes.MimeTypes:
    """
    Create an isolated MimeTypes instance loaded with modern types plus extras.
    Does not affect the global mimetypes state.

    Example:
        db = make_custom_db([("application/x-custom", ".custom")])
        mime, _ = db.guess_type("file.custom")
        print(mime)   # "application/x-custom"
    """
    mt = mimetypes.MimeTypes()
    for mime, ext in _CUSTOM_TYPES:
        mt.add_type(mime, ext)
    if extra_types:
        for mime, ext in extra_types:
            mt.add_type(mime, ext)
    return mt


# ─────────────────────────────────────────────────────────────────────────────
# 4. HTTP response helper
# ─────────────────────────────────────────────────────────────────────────────

@dataclass
class ServeableFile:
    """
    Metadata needed to serve a file via HTTP (Content-Type, encoding, size).

    Example:
        sf = ServeableFile.from_path(Path("/tmp/report.pdf"))
        print(sf.content_type_header)
    """
    path:          Path
    content_type:  str
    encoding:      str | None  # "gzip", "br", etc.
    size:          int

    @classmethod
    def from_path(cls, path: Path) -> "ServeableFile":
        mime, enc = mimetypes.guess_type(str(path))
        ct = mime or "application/octet-stream"
        if ct.startswith("text/"):
            ct += "; charset=utf-8"
        return cls(
            path=path,
            content_type=ct,
            encoding=enc,
            size=path.stat().st_size if path.exists() else 0,
        )

    @property
    def content_type_header(self) -> str:
        return self.content_type

    @property
    def content_encoding_header(self) -> str | None:
        return self.encoding


# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    import tempfile, os

    print("=== mimetypes demo ===")

    # ── detect ---────────────────────────────────────────────────────────────
    print("\n--- detect ---")
    test_files = [
        "report.pdf", "style.css", "image.png", "data.json",
        "archive.tar.gz", "script.py", "video.mp4", "font.woff2",
    ]
    for name in test_files:
        info = detect(name)
        print(f"  {name:20s} → {info!s}")

    # ── content_type_header ───────────────────────────────────────────────────
    print("\n--- content_type_header ---")
    for name in ["index.html", "bundle.js", "data.csv", "photo.webp"]:
        print(f"  {name:20s} → {content_type_header(name)!r}")

    # ── is_safe_upload ────────────────────────────────────────────────────────
    print("\n--- is_safe_upload ---")
    allowed = {"image/jpeg", "image/png", "image/gif", "application/pdf"}
    for name in ["photo.jpg", "malware.exe", "document.pdf", "script.sh"]:
        print(f"  {name:20s} safe={is_safe_upload(name, allowed)}")

    # ── register_modern_types ─────────────────────────────────────────────────
    print("\n--- register_modern_types ---")
    register_modern_types()
    for name in ["data.parquet", "model.wasm", "events.jsonl", "config.yaml"]:
        mime, _ = mimetypes.guess_type(name)
        print(f"  {name:20s} → {mime}")

    # ── guess_all_extensions ──────────────────────────────────────────────────
    print("\n--- guess_all_extensions ---")
    for mime in ["text/html", "image/jpeg", "application/json", "audio/mpeg"]:
        exts = mimetypes.guess_all_extensions(mime)
        print(f"  {mime:25s} → {exts}")

    # ── group_by_type (uses temp files) ──────────────────────────────────────
    print("\n--- group_by_type ---")
    with tempfile.TemporaryDirectory() as tmpdir:
        for fname in ["a.txt", "b.html", "c.png", "d.pdf", "e.json"]:
            Path(tmpdir, fname).write_bytes(b"x")
        paths = list(Path(tmpdir).iterdir())
        groups = group_by_type(paths)
        for grp, files in sorted(groups.items()):
            print(f"  {grp}: {[os.path.basename(f) for f in files]}")

    print("\n=== done ===")

For the python-magic / filetype alternative — python-magic (PyPI) reads the first few bytes of a file and identifies its type from magic numbers (byte signatures) rather than file extension; filetype does the same in pure Python — use python-magic or filetype for security-critical upload validation where you cannot trust the file extension; use mimetypes when you only have a filename or URL (no file contents), for building HTTP Content-Type headers, or when generating file listings from paths. For the email.mime / http alternative — email.mime.base.MIMEBase and http.server.SimpleHTTPRequestHandler both rely on mimetypes.guess_type() internally — the MIME type information flows from mimetypes into both the email attachment MIME headers and the HTTP Content-Type response headers; understanding mimetypes directly lets you override or extend that behaviour without subclassing the higher-level classes. The Claude Skills 360 bundle includes mimetypes skill sets covering MIMEInfo dataclass with detect(), content_type_header()/is_safe_upload()/extension_for() detection helpers, detect_directory()/group_by_type()/filter_by_mime() batch tools, register_modern_types()/make_custom_db() custom registry, and ServeableFile HTTP serve metadata. Start with the free tier to try MIME type detection patterns and mimetypes pipeline code generation.

Keep Reading

Claude Code for email.contentmanager: Python Email Content Accessors

Read and write EmailMessage body content with Python's email.contentmanager module and Claude Code — email contentmanager ContentManager for the class that maps content types to get and set handler functions allowing EmailMessage to support get_content and set_content with type-specific behaviour, email contentmanager raw_data_manager for the ContentManager instance that handles raw bytes and str payloads without any conversion, email contentmanager content_manager for the standard ContentManager instance used by email.policy.default that intelligently handles text plain text html multipart and binary content types, email contentmanager get_content_text for the handler that returns the decoded text payload of a text-star message part as a str, email contentmanager get_content_binary for the handler that returns the raw decoded bytes payload of a non-text message part, email contentmanager get_data_manager for the get-handler lookup used by EmailMessage get_content to find the right reader function for the content type, email contentmanager set_content text for the handler that creates and sets a text part correctly choosing charset and transfer encoding, email contentmanager set_content bytes for the handler that creates and sets a binary part with base64 encoding and optional filename Content-Disposition, email contentmanager EmailMessage get_content for the method that reads the message body using the registered content manager handlers, email contentmanager EmailMessage set_content for the method that sets the message body and MIME headers in one call, email contentmanager EmailMessage make_alternative make_mixed make_related for the methods that convert a simple message into a multipart container, email contentmanager EmailMessage add_attachment for the method that attaches a file or bytes to a multipart message, and email contentmanager integration with email.message and email.policy and email.mime and io for building high-level email readers attachment extractors text body accessors HTML readers and policy-aware MIME construction pipelines.

5 min read Feb 12, 2029

Claude Code for email.charset: Python Email Charset Encoding

Control header and body encoding for international email with Python's email.charset module and Claude Code — email charset Charset for the class that wraps a character set name with the encoding rules for header encoding and body encoding describing how to encode text for that charset in email messages, email charset Charset header_encoding for the attribute specifying whether headers using this charset should use QP quoted-printable encoding BASE64 encoding or no encoding, email charset Charset body_encoding for the attribute specifying the Content-Transfer-Encoding to use for message bodies in this charset such as QP or BASE64, email charset Charset output_codec for the attribute giving the Python codec name used to encode the string to bytes for the wire format, email charset Charset input_codec for the attribute giving the Python codec name used to decode incoming bytes to str, email charset Charset get_output_charset for returning the output charset name, email charset Charset header_encode for encoding a header string using the charset's header_encoding method, email charset Charset body_encode for encoding body content using the charset's body_encoding, email charset Charset convert for converting a string from the input_codec to the output_codec, email charset add_charset for registering a new charset with custom encoding rules in the global charset registry, email charset add_alias for adding an alias name that maps to an existing registered charset, email charset add_codec for registering a codec name mapping for use by the charset machinery, and email charset integration with email.message and email.mime and email.policy and email.encoders for building international email senders non-ASCII header encoders Content-Transfer-Encoding selectors charset-aware message constructors and MIME encoding pipelines.

5 min read Feb 11, 2029

Claude Code for email.utils: Python Email Address and Header Utilities

Parse and format RFC 2822 email addresses and dates with Python's email.utils module and Claude Code — email utils parseaddr for splitting a display-name plus angle-bracket address string into a realname and email address tuple, email utils formataddr for combining a realname and address string into a properly quoted RFC 2822 address with angle brackets, email utils getaddresses for parsing a list of raw address header strings each potentially containing multiple comma-separated addresses into a list of realname address tuples, email utils parsedate for parsing an RFC 2822 date string into a nine-tuple compatible with time.mktime, email utils parsedate_tz for parsing an RFC 2822 date string into a ten-tuple that includes the UTC offset timezone in seconds, email utils parsedate_to_datetime for parsing an RFC 2822 date string into an aware datetime object with timezone, email utils formatdate for formatting a POSIX timestamp or the current time as an RFC 2822 date string with optional usegmt and localtime flags, email utils format_datetime for formatting a datetime object as an RFC 2822 date string, email utils make_msgid for generating a globally unique Message-ID string with optional idstring and domain components, email utils decode_rfc2231 for decoding an RFC 2231 encoded parameter value into a tuple of charset language and value, email utils encode_rfc2231 for encoding a string as an RFC 2231 encoded parameter value, email utils collapse_rfc2231_value for collapsing a decoded RFC 2231 tuple to a Unicode string, and email utils integration with email.message and email.headerregistry and datetime and time for building address parsers date formatters message-id generators header extractors and RFC-compliant email construction utilities.

5 min read Feb 10, 2029

Put these ideas into practice

Claude Skills 360 gives you production-ready skills for everything in this article — and 2,350+ more. Start free or go all-in.

Get 360 skills free

Free $39