Python’s imghdr module identifies image file types by reading the first few bytes — the “magic bytes” that identify each format. import imghdr. what: imghdr.what("photo.jpg") → "jpeg" (or "png", "gif", "tiff", "bmp", "webp", "rgb" (SGI), "rgbe", "xbm", "pbm", "pgm", "ppm", "rast" (Sun raster)); returns None for unknown. File path: imghdr.what("path/to/file.png"). File-like: imghdr.what(file_obj). Bytes header: imghdr.what(None, h=first_bytes). imghdr.tests — list of test functions; append a custom (h, f) -> str | None function to support additional formats. Detection is heuristic — reads at most 32 bytes. Deprecated in Python 3.11, removed in 3.13 — for new code use the filetype package (PyPI) or PIL.Image.open(). Format strings returned: "png", "jpeg", "gif", "tiff", "bmp", "webp", "rgb" (SGI), "rgbe" (HDR), "xbm". Claude Code generates file upload validators, image pipeline guards, format migrators, and magic-byte inspectors.
CLAUDE.md for imghdr
## imghdr Stack
- Stdlib: import imghdr
- File: imghdr.what("photo.jpg") # "jpeg" or None
- Bytes: imghdr.what(None, h=header_bytes) # works without a file
- Object: imghdr.what(open("f.png","rb")) # file-like object
- Custom: imghdr.tests.append(my_test_fn) # extend for new formats
- Note: Deprecated 3.11, removed 3.13 — use filetype or PIL for new code
imghdr Image Detection Pipeline
# app/imghdrutil.py — detect, validate, scan, categorize, magic-byte fallback
from __future__ import annotations
import io
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any
# imghdr was removed in Python 3.13; provide a graceful fallback shim
try:
import imghdr as _imghdr
_IMGHDR_AVAILABLE = True
except ImportError:
_imghdr = None # type: ignore[assignment]
_IMGHDR_AVAILABLE = False
# ─────────────────────────────────────────────────────────────────────────────
# 0. Magic-byte fallback (covers the same formats as imghdr)
# ─────────────────────────────────────────────────────────────────────────────
_MAGIC: list[tuple[bytes, int, str]] = [
(b"\x89PNG\r\n\x1a\n", 0, "png"),
(b"\xff\xd8\xff", 0, "jpeg"),
(b"GIF87a", 0, "gif"),
(b"GIF89a", 0, "gif"),
(b"II\x2a\x00", 0, "tiff"),
(b"MM\x00\x2a", 0, "tiff"),
(b"BM", 0, "bmp"),
(b"RIFF", 0, "webp"), # confirmed by bytes 8-12 = "WEBP"
(b"\x1a\x45\xdf\xa3", 0, "mkv"), # not image but common
(b"#?RADIANCE\n", 0, "rgbe"),
(b"#?RGBE\n", 0, "rgbe"),
(b"\x01\xda", 0, "rgb"),
(b"\x59\xa6\x6a\x95", 0, "rast"),
(b"P1", 0, "pbm"), (b"P4", 0, "pbm"),
(b"P2", 0, "pgm"), (b"P5", 0, "pgm"),
(b"P3", 0, "ppm"), (b"P6", 0, "ppm"),
(b"#define ", 0, "xbm"),
]
_WEBP_MAGIC = b"WEBP"
def _detect_from_bytes(header: bytes) -> str | None:
"""Detect image type from magic bytes (pure Python fallback)."""
for magic, offset, fmt in _MAGIC:
if header[offset:offset + len(magic)] == magic:
if fmt == "webp":
# Confirm RIFF...WEBP
if len(header) >= 12 and header[8:12] == _WEBP_MAGIC:
return "webp"
continue
return fmt
return None
def detect_type(
source: str | Path | bytes | io.IOBase,
use_imghdr: bool = True,
) -> str | None:
"""
Detect image type from a file path, bytes, or file-like object.
Returns a type string ("jpeg", "png", etc.) or None.
Falls back to magic-byte detection if imghdr is unavailable (Python 3.13+).
Example:
fmt = detect_type("photo.jpg") # "jpeg"
fmt = detect_type(b"\\x89PNG\\r\\n...") # "png"
"""
if _IMGHDR_AVAILABLE and use_imghdr:
if isinstance(source, (str, Path)):
return _imghdr.what(str(source))
elif isinstance(source, bytes):
return _imghdr.what(None, h=source)
else:
return _imghdr.what(source)
# Fallback: read header bytes and check magic
header: bytes
if isinstance(source, (str, Path)):
with open(str(source), "rb") as f:
header = f.read(32)
elif isinstance(source, bytes):
header = source[:32]
else:
pos = source.tell() if hasattr(source, "tell") else 0
header = source.read(32)
if hasattr(source, "seek"):
source.seek(pos)
return _detect_from_bytes(header)
# ─────────────────────────────────────────────────────────────────────────────
# 1. Validation helpers
# ─────────────────────────────────────────────────────────────────────────────
# Canonical MIME types for each imghdr format string
_FORMAT_TO_MIME: dict[str, str] = {
"jpeg": "image/jpeg",
"png": "image/png",
"gif": "image/gif",
"tiff": "image/tiff",
"bmp": "image/bmp",
"webp": "image/webp",
"rgb": "image/sgi",
"rgbe": "image/vnd.radiance",
"xbm": "image/x-xbitmap",
"pbm": "image/x-portable-bitmap",
"pgm": "image/x-portable-graymap",
"ppm": "image/x-portable-pixmap",
"rast": "image/x-sun-raster",
}
_FORMAT_TO_EXT: dict[str, list[str]] = {
"jpeg": [".jpg", ".jpeg", ".jpe"],
"png": [".png"],
"gif": [".gif"],
"tiff": [".tif", ".tiff"],
"bmp": [".bmp"],
"webp": [".webp"],
"rgb": [".rgb", ".sgi"],
"rgbe": [".hdr", ".rgbe"],
"xbm": [".xbm"],
"pbm": [".pbm"],
"pgm": [".pgm"],
"ppm": [".ppm"],
"rast": [".rast", ".sun"],
}
def is_image(source: str | Path | bytes) -> bool:
"""
Return True if the source is a recognized image format.
Example:
if not is_image(uploaded_bytes):
raise ValueError("Not an image")
"""
return detect_type(source) is not None
def validate_image_format(
source: str | Path | bytes,
allowed: list[str] | None = None,
) -> tuple[bool, str | None]:
"""
Check that source is a recognized image type, optionally in an allowed set.
Returns (is_valid, detected_format_or_None).
allowed: list of format strings, e.g. ["jpeg", "png", "webp"].
Example:
ok, fmt = validate_image_format(data, allowed=["jpeg", "png"])
if not ok:
print(f"Rejected: {fmt!r} not in allowed set")
"""
fmt = detect_type(source)
if fmt is None:
return False, None
if allowed and fmt not in allowed:
return False, fmt
return True, fmt
def get_mime_type(source: str | Path | bytes) -> str | None:
"""
Return the MIME type for the detected image format, or None.
Example:
mime = get_mime_type("photo.jpg") # "image/jpeg"
"""
fmt = detect_type(source)
return _FORMAT_TO_MIME.get(fmt) if fmt else None
def extension_matches_content(path: str | Path) -> tuple[bool, str | None, str | None]:
"""
Check that a file's extension matches its actual content type.
Returns (matches, detected_type, declared_type_by_extension).
Example:
ok, detected, declared = extension_matches_content("image.png")
if not ok:
print(f"Extension mismatch: file is {detected!r}, not {declared!r}")
"""
p = Path(path)
ext = p.suffix.lower()
declared: str | None = None
for fmt, exts in _FORMAT_TO_EXT.items():
if ext in exts:
declared = fmt
break
detected = detect_type(path)
if detected is None:
return False, None, declared
if declared is None:
return False, detected, None
return detected == declared, detected, declared
# ─────────────────────────────────────────────────────────────────────────────
# 2. Batch scanner
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ImageFile:
path: Path
fmt: str | None
mime: str | None
size: int
ext_match: bool
def __str__(self) -> str:
match_tag = "✓" if self.ext_match else "✗"
return (f"{match_tag} {self.fmt or 'unknown':6s} "
f"{self.size:8,d}B {self.path}")
def scan_directory(
directory: str | Path,
extensions: list[str] | None = None,
recursive: bool = True,
) -> list[ImageFile]:
"""
Scan a directory for image files and classify each one.
extensions: if given, only scan files with these extensions.
Returns a list of ImageFile sorted by path.
Example:
for img in scan_directory("uploads/", extensions=[".jpg", ".png"]):
if not img.ext_match:
print(f" mismatch: {img}")
"""
root = Path(directory)
pattern = "**/*" if recursive else "*"
all_files = sorted(root.glob(pattern))
results: list[ImageFile] = []
for p in all_files:
if not p.is_file():
continue
if extensions and p.suffix.lower() not in extensions:
continue
try:
fmt = detect_type(p)
if fmt is None and not extensions:
continue # skip non-images when no extension filter
mime = _FORMAT_TO_MIME.get(fmt) if fmt else None
size = p.stat().st_size
_, det, decl = extension_matches_content(p)
ext_ok = (det == decl) if (det and decl) else False
results.append(ImageFile(path=p.relative_to(root), fmt=fmt, mime=mime,
size=size, ext_match=ext_ok))
except OSError:
continue
return results
# ─────────────────────────────────────────────────────────────────────────────
# 3. Format summary
# ─────────────────────────────────────────────────────────────────────────────
def format_summary(images: list[ImageFile]) -> dict[str, int]:
"""
Count images by format. Returns {format_string: count}.
Example:
summary = format_summary(scan_directory("uploads/"))
for fmt, count in sorted(summary.items()):
print(f" {fmt}: {count}")
"""
counts: dict[str, int] = {}
for img in images:
key = img.fmt or "unknown"
counts[key] = counts.get(key, 0) + 1
return counts
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import struct, tempfile
print("=== imghdr demo ===")
print(f" imghdr available: {_IMGHDR_AVAILABLE}")
# ── build minimal test images in memory ────────────────────────────────────
# Minimal valid magic bytes for each format
test_headers: dict[str, bytes] = {
"png": b"\x89PNG\r\n\x1a\n" + b"\x00" * 24,
"jpeg": b"\xff\xd8\xff\xe0" + b"\x00" * 28,
"gif": b"GIF89a" + b"\x00" * 26,
"bmp": b"BM" + b"\x00" * 30,
"tiff_le": b"II\x2a\x00" + b"\x00" * 28,
"tiff_be": b"MM\x00\x2a" + b"\x00" * 28,
"webp": b"RIFF\x00\x00\x00\x00WEBP" + b"\x00" * 20,
}
# ── detect_type from bytes ─────────────────────────────────────────────────
print("\n--- detect_type from bytes ---")
for name, header in test_headers.items():
fmt = detect_type(header)
print(f" {name:10s}: {fmt!r}")
# ── get_mime_type ──────────────────────────────────────────────────────────
print("\n--- get_mime_type ---")
for _, header in list(test_headers.items())[:4]:
print(f" {detect_type(header)!r:6s}: {get_mime_type(header)!r}")
# ── validate_image_format ──────────────────────────────────────────────────
print("\n--- validate_image_format (allowed=jpeg,png) ---")
for name, header in test_headers.items():
ok, fmt = validate_image_format(header, allowed=["jpeg", "png"])
print(f" {name:10s}: ok={ok} fmt={fmt!r}")
# ── scan_directory with temp files ─────────────────────────────────────────
print("\n--- scan_directory ---")
with tempfile.TemporaryDirectory() as tmpdir:
# Create test files
for name, header in test_headers.items():
ext = ".png" if "png" in name else ".jpg" if "jpeg" in name else ".gif" if "gif" in name else ".bin"
(Path(tmpdir) / f"test_{name}{ext}").write_bytes(header)
# Add a non-image file
(Path(tmpdir) / "readme.txt").write_bytes(b"Hello world")
images = scan_directory(tmpdir, recursive=False)
for img in images:
print(f" {img}")
summary = format_summary(images)
print(f"\n format summary: {summary}")
print("\n=== done ===")
For the filetype alternative — filetype (PyPI) detects over 80 file types including images, video, audio, archives, and documents using magic bytes, with no external binary dependencies — use filetype for new code on Python 3.11+ where imghdr has been removed, especially when you need to detect more than just image formats or require simple filetype.is_image(), filetype.guess() convenience functions. For the PIL.Image / Pillow alternative — PIL.Image.open(path).format reads and validates the image file fully, returning format strings like "JPEG", "PNG", "WEBP"; it also validates file integrity beyond magic bytes — use Pillow when you need to be certain the file is not just magic-byte-faked (Pillow will raise an exception on truncated or corrupt data), when you need image dimensions, mode, or EXIF metadata alongside the format, or when you’re already processing the image; use imghdr or filetype for lightweight header-only type detection without the overhead of a full image decode. The Claude Skills 360 bundle includes imghdr skill sets covering detect_type() with imghdr + magic-byte fallback (Python 3.13 safe), is_image()/validate_image_format()/get_mime_type()/extension_matches_content() validators, ImageFile dataclass with scan_directory() batch scanner, and format_summary() statistics. Start with the free tier to try image format detection patterns and imghdr pipeline code generation.