Python’s mimetypes module maps file extensions to MIME types and vice versa. import mimetypes. guess_type: mime, enc = mimetypes.guess_type("report.pdf") → ("application/pdf", None); ("archive.tar.gz", None) → ("application/x-tar", "gzip") — enc is the content-encoding. guess_extension: mimetypes.guess_extension("image/png") → ".png" (may vary). guess_all_extensions: mimetypes.guess_all_extensions("text/html") → [".html", ".htm", ".shtml"]. add_type: mimetypes.add_type("application/x-parquet", ".parquet") — register custom types. types_map: mimetypes.types_map → {".ext": "mime/type"} dict. suffix_map: {".gz": ".tgz-like compound extension"}. encodings_map: {".gz": "gzip", ".bz2": "bzip2"}. MimeTypes class: mt = mimetypes.MimeTypes() — isolated instance; mt.add_type(...) doesn’t affect global state. init: mimetypes.init() — (re)loads from system files plus built-in table. strict=False: guess_type(url, strict=False) — also checks Windows registry. mimetypes.guess_type accepts full URLs: mimetypes.guess_type("http://example.com/file.json") → ("application/json", None). Claude Code generates HTTP file servers with correct Content-Type headers, upload validators, MIME-based routing tables, and media-type normalisers.
CLAUDE.md for mimetypes
## mimetypes Stack
- Stdlib: import mimetypes
- Detect: mime, enc = mimetypes.guess_type(path_or_url)
- Ext: ext = mimetypes.guess_extension(mime_type)
- Custom: mimetypes.add_type("application/x-custom", ".custom")
- Map: mimetypes.types_map # {".ext": "mime/type"}
- Isolated: mt = mimetypes.MimeTypes(); mt.add_type(m, e)
mimetypes MIME Detection Pipeline
# app/mimetypeutil.py — detect, validate, serve, batch, custom registry
from __future__ import annotations
import mimetypes
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
# ─────────────────────────────────────────────────────────────────────────────
# 1. Detection helpers
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class MIMEInfo:
path: str
mime_type: str | None # e.g. "application/pdf"
encoding: str | None # e.g. "gzip" or None
is_text: bool
is_binary: bool
main_type: str # "application", "text", "image", ...
sub_type: str # "pdf", "plain", "png", ...
extensions: list[str] # other known extensions for this type
def __str__(self) -> str:
enc = f" ({self.encoding})" if self.encoding else ""
return f"{self.mime_type}{enc} [{', '.join(self.extensions[:3])}]"
def detect(path: str | Path) -> MIMEInfo:
"""
Detect MIME type for a file path or URL.
Example:
info = detect("report.pdf")
print(info.mime_type) # "application/pdf"
info = detect("archive.tar.gz")
print(info.mime_type, info.encoding) # "application/x-tar", "gzip"
"""
p = str(path)
mime, enc = mimetypes.guess_type(p)
main, sub = (mime or "application/octet-stream").split("/", 1)
is_text = main == "text" or sub in ("json", "xml", "javascript", "csv")
exts = mimetypes.guess_all_extensions(mime or "") if mime else []
return MIMEInfo(
path=p,
mime_type=mime,
encoding=enc,
is_text=is_text,
is_binary=not is_text,
main_type=main,
sub_type=sub,
extensions=exts,
)
def content_type_header(path: str | Path, charset: str = "utf-8") -> str:
"""
Return a full Content-Type header value for a file path.
Appends charset for text types.
Example:
content_type_header("style.css") # "text/css; charset=utf-8"
content_type_header("data.bin") # "application/octet-stream"
"""
info = detect(path)
mime = info.mime_type or "application/octet-stream"
if info.is_text:
return f"{mime}; charset={charset}"
return mime
def is_safe_upload(filename: str, allowed_mimes: set[str]) -> bool:
"""
Return True if the filename's detected MIME type is in allowed_mimes.
Example:
allowed = {"image/jpeg", "image/png", "application/pdf"}
is_safe_upload("photo.jpg", allowed) # True
is_safe_upload("script.exe", allowed) # False
"""
mime, _ = mimetypes.guess_type(filename)
return mime in allowed_mimes if mime else False
def extension_for(mime_type: str) -> str | None:
"""
Return the canonical extension for a MIME type, or None.
Example:
extension_for("image/png") # ".png"
extension_for("application/json") # ".json"
"""
return mimetypes.guess_extension(mime_type)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Batch operations
# ─────────────────────────────────────────────────────────────────────────────
def detect_directory(
directory: str | Path,
recursive: bool = False,
) -> dict[str, MIMEInfo]:
"""
Detect MIME types for all files in a directory.
Returns {relative_path: MIMEInfo}.
Example:
types = detect_directory("/tmp/uploads")
for path, info in types.items():
print(f"{path}: {info.mime_type}")
"""
root = Path(directory)
glob = "**/*" if recursive else "*"
result: dict[str, MIMEInfo] = {}
for p in root.glob(glob):
if p.is_file():
result[str(p.relative_to(root))] = detect(p)
return result
def group_by_type(paths: list[str | Path]) -> dict[str, list[str]]:
"""
Group file paths by detected MIME main type (e.g. "image", "text", "application").
Example:
groups = group_by_type(Path("/tmp").glob("*"))
print(groups.get("image", []))
"""
groups: dict[str, list[str]] = {}
for p in paths:
info = detect(p)
groups.setdefault(info.main_type, []).append(str(p))
return groups
def filter_by_mime(paths: list[str | Path], mime_prefix: str) -> list[str]:
"""
Return paths whose MIME type starts with mime_prefix.
Example:
images = filter_by_mime(all_files, "image/")
text_files = filter_by_mime(all_files, "text/")
"""
return [str(p) for p in paths
if (detect(p).mime_type or "").startswith(mime_prefix)]
# ─────────────────────────────────────────────────────────────────────────────
# 3. Custom MIME registry
# ─────────────────────────────────────────────────────────────────────────────
# Well-known modern types not always in the system database
_CUSTOM_TYPES: list[tuple[str, str]] = [
("application/x-parquet", ".parquet"),
("application/x-arrow", ".arrow"),
("application/x-ndjson", ".ndjson"),
("application/x-jsonlines", ".jsonl"),
("application/wasm", ".wasm"),
("text/x-python", ".py"),
("text/x-toml", ".toml"),
("text/x-yaml", ".yml"),
("text/x-yaml", ".yaml"),
("image/webp", ".webp"),
("image/avif", ".avif"),
("font/woff", ".woff"),
("font/woff2", ".woff2"),
]
def register_modern_types() -> None:
"""
Register commonly-missing modern MIME types into the global mimetypes db.
Call once at startup.
Example:
register_modern_types()
print(mimetypes.guess_type("data.parquet")) # "application/x-parquet"
"""
for mime, ext in _CUSTOM_TYPES:
mimetypes.add_type(mime, ext)
def make_custom_db(
extra_types: list[tuple[str, str]] | None = None,
) -> mimetypes.MimeTypes:
"""
Create an isolated MimeTypes instance loaded with modern types plus extras.
Does not affect the global mimetypes state.
Example:
db = make_custom_db([("application/x-custom", ".custom")])
mime, _ = db.guess_type("file.custom")
print(mime) # "application/x-custom"
"""
mt = mimetypes.MimeTypes()
for mime, ext in _CUSTOM_TYPES:
mt.add_type(mime, ext)
if extra_types:
for mime, ext in extra_types:
mt.add_type(mime, ext)
return mt
# ─────────────────────────────────────────────────────────────────────────────
# 4. HTTP response helper
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ServeableFile:
"""
Metadata needed to serve a file via HTTP (Content-Type, encoding, size).
Example:
sf = ServeableFile.from_path(Path("/tmp/report.pdf"))
print(sf.content_type_header)
"""
path: Path
content_type: str
encoding: str | None # "gzip", "br", etc.
size: int
@classmethod
def from_path(cls, path: Path) -> "ServeableFile":
mime, enc = mimetypes.guess_type(str(path))
ct = mime or "application/octet-stream"
if ct.startswith("text/"):
ct += "; charset=utf-8"
return cls(
path=path,
content_type=ct,
encoding=enc,
size=path.stat().st_size if path.exists() else 0,
)
@property
def content_type_header(self) -> str:
return self.content_type
@property
def content_encoding_header(self) -> str | None:
return self.encoding
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile, os
print("=== mimetypes demo ===")
# ── detect ---────────────────────────────────────────────────────────────
print("\n--- detect ---")
test_files = [
"report.pdf", "style.css", "image.png", "data.json",
"archive.tar.gz", "script.py", "video.mp4", "font.woff2",
]
for name in test_files:
info = detect(name)
print(f" {name:20s} → {info!s}")
# ── content_type_header ───────────────────────────────────────────────────
print("\n--- content_type_header ---")
for name in ["index.html", "bundle.js", "data.csv", "photo.webp"]:
print(f" {name:20s} → {content_type_header(name)!r}")
# ── is_safe_upload ────────────────────────────────────────────────────────
print("\n--- is_safe_upload ---")
allowed = {"image/jpeg", "image/png", "image/gif", "application/pdf"}
for name in ["photo.jpg", "malware.exe", "document.pdf", "script.sh"]:
print(f" {name:20s} safe={is_safe_upload(name, allowed)}")
# ── register_modern_types ─────────────────────────────────────────────────
print("\n--- register_modern_types ---")
register_modern_types()
for name in ["data.parquet", "model.wasm", "events.jsonl", "config.yaml"]:
mime, _ = mimetypes.guess_type(name)
print(f" {name:20s} → {mime}")
# ── guess_all_extensions ──────────────────────────────────────────────────
print("\n--- guess_all_extensions ---")
for mime in ["text/html", "image/jpeg", "application/json", "audio/mpeg"]:
exts = mimetypes.guess_all_extensions(mime)
print(f" {mime:25s} → {exts}")
# ── group_by_type (uses temp files) ──────────────────────────────────────
print("\n--- group_by_type ---")
with tempfile.TemporaryDirectory() as tmpdir:
for fname in ["a.txt", "b.html", "c.png", "d.pdf", "e.json"]:
Path(tmpdir, fname).write_bytes(b"x")
paths = list(Path(tmpdir).iterdir())
groups = group_by_type(paths)
for grp, files in sorted(groups.items()):
print(f" {grp}: {[os.path.basename(f) for f in files]}")
print("\n=== done ===")
For the python-magic / filetype alternative — python-magic (PyPI) reads the first few bytes of a file and identifies its type from magic numbers (byte signatures) rather than file extension; filetype does the same in pure Python — use python-magic or filetype for security-critical upload validation where you cannot trust the file extension; use mimetypes when you only have a filename or URL (no file contents), for building HTTP Content-Type headers, or when generating file listings from paths. For the email.mime / http alternative — email.mime.base.MIMEBase and http.server.SimpleHTTPRequestHandler both rely on mimetypes.guess_type() internally — the MIME type information flows from mimetypes into both the email attachment MIME headers and the HTTP Content-Type response headers; understanding mimetypes directly lets you override or extend that behaviour without subclassing the higher-level classes. The Claude Skills 360 bundle includes mimetypes skill sets covering MIMEInfo dataclass with detect(), content_type_header()/is_safe_upload()/extension_for() detection helpers, detect_directory()/group_by_type()/filter_by_mime() batch tools, register_modern_types()/make_custom_db() custom registry, and ServeableFile HTTP serve metadata. Start with the free tier to try MIME type detection patterns and mimetypes pipeline code generation.