filetype detects file types from magic bytes — no extension or MIME header needed. pip install filetype. guess: import filetype; kind = filetype.guess("file.unknown"); kind.mime → "image/png"; kind.extension → "png". guess from bytes: kind = filetype.guess(open("f","rb").read(262)). guess_mime: filetype.guess_mime(buf) → “image/jpeg” or None. guess_extension: filetype.guess_extension(buf) → “jpg” or None. is_image: filetype.is_image("photo.png") → True. is_video: filetype.is_video(buf). is_audio: filetype.is_audio(buf). is_archive: filetype.is_archive("file.zip"). is_document: filetype.is_document(buf). Supported: JPEG, PNG, GIF, WEBP, BMP, TIFF, HEIC, MP4, MKV, MOV, AVI, MP3, OGG, FLAC, WAV, ZIP, GZIP, BZIP2, 7Z, TAR, RAR, ZSTD, PDF, DOCX, XLSX, PPTX, and many more. Types: filetype.image_matchers, filetype.video_matchers, etc. Custom matcher: subclass filetype.Type, set MIME and EXTENSION, implement match(buf) -> bool. Register: filetype.add_type(MyCustomType()). Read minimum bytes: only 262 bytes needed, no full file read. Async-safe: no I/O inside guess(), pass bytes. Claude Code generates filetype upload validators, MIME routers, archive detectors, and forensic analysis tools.
CLAUDE.md for filetype
## filetype Stack
- Version: filetype >= 1.2 | pip install filetype
- Guess: kind = filetype.guess(path_or_bytes) | kind.mime | kind.extension
- Check: filetype.is_image/is_video/is_audio/is_archive/is_document(src)
- MIME: filetype.guess_mime(src) → str | None
- Ext: filetype.guess_extension(src) → str | None
- Only 262 bytes needed: pass file[:262] for performance with large files
filetype Detection Pipeline
# app/filetype_utils.py — filetype detection, validation, routing, custom types
from __future__ import annotations
import io
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Any, BinaryIO
import filetype
from filetype.types import Type as FiletypeBase
log = logging.getLogger(__name__)
# Number of bytes filetype needs to identify any type (its own constant is 262)
MAGIC_BYTES = 262
# ─────────────────────────────────────────────────────────────────────────────
# 1. Detection helpers
# ─────────────────────────────────────────────────────────────────────────────
def read_head(src: str | Path | bytes | BinaryIO, n: int = MAGIC_BYTES) -> bytes:
"""
Read the first n bytes from a path, bytes object, or file-like object.
Only MAGIC_BYTES are needed for reliable detection.
"""
if isinstance(src, (str, Path)):
with open(src, "rb") as f:
return f.read(n)
if isinstance(src, (bytes, bytearray)):
return bytes(src[:n])
if hasattr(src, "read"):
head = src.read(n)
if hasattr(src, "seek"):
src.seek(0)
return head
raise TypeError(f"Expected path, bytes, or file object; got {type(src)}")
@dataclass
class FileTypeResult:
mime: str | None
extension: str | None
category: str | None # "image" | "video" | "audio" | "archive" | "document" | None
@property
def known(self) -> bool:
return self.mime is not None
def __str__(self) -> str:
return self.mime or "application/octet-stream"
def detect(src: str | Path | bytes | BinaryIO) -> FileTypeResult:
"""
Detect the file type of a path, bytes buffer, or file-like object.
Example:
result = detect("upload.bin")
print(result.mime) # "image/png"
print(result.extension) # "png"
print(result.category) # "image"
"""
head = read_head(src)
kind = filetype.guess(head)
if kind is None:
return FileTypeResult(mime=None, extension=None, category=None)
category: str | None = None
if filetype.is_image(head):
category = "image"
elif filetype.is_video(head):
category = "video"
elif filetype.is_audio(head):
category = "audio"
elif filetype.is_archive(head):
category = "archive"
elif filetype.is_document(head):
category = "document"
return FileTypeResult(
mime=kind.mime,
extension=kind.extension,
category=category,
)
def mime_of(src: str | Path | bytes | BinaryIO) -> str:
"""
Return the MIME type string, defaulting to 'application/octet-stream'.
Example:
mime = mime_of("video.mp4") # "video/mp4"
mime = mime_of(b"\\x89PNG\\r\\n") # "image/png"
"""
result = detect(src)
return result.mime or "application/octet-stream"
def ext_of(src: str | Path | bytes | BinaryIO) -> str | None:
"""
Return the detected extension (without dot), or None.
Example:
ext_of(b"\\xff\\xd8\\xff") # "jpg"
ext_of(b"\\x00" * 10) # None
"""
return detect(src).extension
# ─────────────────────────────────────────────────────────────────────────────
# 2. Validation
# ─────────────────────────────────────────────────────────────────────────────
ALLOWED_IMAGES = {"image/jpeg", "image/png", "image/gif", "image/webp", "image/avif"}
ALLOWED_VIDEOS = {"video/mp4", "video/webm", "video/quicktime", "video/x-matroska"}
ALLOWED_AUDIO = {"audio/mpeg", "audio/ogg", "audio/flac", "audio/wav", "audio/aac"}
ALLOWED_DOCUMENTS = {"application/pdf", "application/zip",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"}
class FileTypeError(ValueError):
"""Raised when a file fails type validation."""
def validate_upload(
src: str | Path | bytes | BinaryIO,
allowed_mimes: set[str] | None = None,
max_size_bytes: int | None = None,
filename: str | None = None,
) -> FileTypeResult:
"""
Validate an uploaded file's actual type against an allowlist.
Ignores the filename extension — inspects magic bytes only.
Raises FileTypeError on failure.
Example:
# Accept only images
result = validate_upload(request.body, allowed_mimes=ALLOWED_IMAGES)
# Returns FileTypeResult with detected mime and extension
# Accept PDF or DOCX, max 10 MB
result = validate_upload(doc_bytes, allowed_mimes={"application/pdf", "application/..."}, max_size_bytes=10_485_760)
"""
if isinstance(src, (str, Path)):
data = Path(src).read_bytes()
elif isinstance(src, (bytes, bytearray)):
data = bytes(src)
else:
data = src.read() # type: ignore[union-attr]
if hasattr(src, "seek"):
src.seek(0)
if max_size_bytes and len(data) > max_size_bytes:
raise FileTypeError(
f"File too large: {len(data):,} bytes > {max_size_bytes:,} limit"
)
result = detect(data)
if not result.known:
raise FileTypeError(
f"Unknown file type{' for ' + filename if filename else ''} — cannot be identified"
)
if allowed_mimes and result.mime not in allowed_mimes:
raise FileTypeError(
f"File type {result.mime!r} is not allowed"
f"{' for ' + filename if filename else ''}. "
f"Allowed: {', '.join(sorted(allowed_mimes))}"
)
return result
def is_safe_image(src: str | Path | bytes | BinaryIO) -> bool:
"""
Return True if the file is a safe, allowed image type.
Example:
if not is_safe_image(uploaded_bytes):
return {"error": "not a valid image"}, 400
"""
head = read_head(src)
if not filetype.is_image(head):
return False
mime = filetype.guess_mime(head)
return mime in ALLOWED_IMAGES
def reject_executables(src: str | Path | bytes | BinaryIO) -> None:
"""
Raise FileTypeError if the file looks like an executable or script.
Example:
reject_executables(user_upload) # raises if PE/ELF/Mach-O detected
"""
EXEC_MIMES = {
"application/x-executable",
"application/x-mach-binary",
"application/x-msdownload",
"application/x-dosexec",
"application/x-sharedlib",
}
result = detect(src)
if result.mime in EXEC_MIMES:
raise FileTypeError(
f"Executable file type {result.mime!r} is not permitted"
)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Content-type routing
# ─────────────────────────────────────────────────────────────────────────────
def route_by_type(
src: str | Path | bytes | BinaryIO,
handlers: dict[str, Any],
default: Any = None,
) -> Any:
"""
Route a file to a handler based on its detected MIME type.
handlers keys can be full MIME types or category shortcuts:
"image", "video", "audio", "archive", "document".
Example:
result = route_by_type(upload, {
"image": process_image,
"video": process_video,
"application/pdf": process_pdf,
})
"""
result = detect(src)
# Try exact MIME match first
if result.mime and result.mime in handlers:
return handlers[result.mime](src)
# Try category match
if result.category and result.category in handlers:
return handlers[result.category](src)
if default is not None:
return default(src)
raise FileTypeError(
f"No handler for type {result.mime!r} (category: {result.category!r})"
)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Bulk scanning
# ─────────────────────────────────────────────────────────────────────────────
def scan_directory(
directory: str | Path,
pattern: str = "**/*",
skip_errors: bool = True,
) -> list[dict]:
"""
Scan all files in a directory and return their detected types.
Example:
report = scan_directory("uploads/")
by_mime = {}
for f in report:
by_mime.setdefault(f["mime"], []).append(f["path"])
"""
results = []
for path in Path(directory).glob(pattern):
if not path.is_file():
continue
try:
result = detect(path)
results.append({
"path": str(path),
"size": path.stat().st_size,
"mime": result.mime,
"extension": result.extension,
"category": result.category,
})
except Exception as exc:
if skip_errors:
log.warning("Could not detect type for %s: %s", path, exc)
results.append({"path": str(path), "error": str(exc)})
else:
raise
return results
def find_mismatched_extensions(directory: str | Path) -> list[dict]:
"""
Find files whose extension doesn't match their detected type.
Useful for finding mislabeled or renamed files.
Example:
mismatches = find_mismatched_extensions("uploads/")
for m in mismatches:
print(f"{m['path']}: declared .{m['declared_ext']} but is .{m['detected_ext']}")
"""
mismatches = []
for item in scan_directory(directory):
if "error" in item or item["detected_ext"] is None:
continue
path = Path(item["path"])
declared = path.suffix.lstrip(".").lower()
detected = item.get("extension", "")
if declared and detected and declared != detected:
mismatches.append({
"path": str(path),
"declared_ext": declared,
"detected_ext": detected,
"mime": item["mime"],
})
return mismatches
# ─────────────────────────────────────────────────────────────────────────────
# 5. Custom type matcher
# ─────────────────────────────────────────────────────────────────────────────
class SQLiteType(FiletypeBase):
"""
Detect SQLite database files by their magic header.
Example:
filetype.add_type(SQLiteType())
kind = filetype.guess(sqlite_bytes)
kind.mime # "application/x-sqlite3"
"""
MIME = "application/x-sqlite3"
EXTENSION = "sqlite"
def match(self, buf: bytes) -> bool:
# SQLite magic: "SQLite format 3\x00"
return (
len(buf) >= 16
and buf[:16] == b"SQLite format 3\x00"
)
class AvroType(FiletypeBase):
"""Detect Apache Avro object container files."""
MIME = "application/x-avro"
EXTENSION = "avro"
def match(self, buf: bytes) -> bool:
# Avro magic: "Obj\x01"
return len(buf) >= 4 and buf[:4] == b"Obj\x01"
def register_custom_types() -> None:
"""
Register all custom type matchers with filetype.
Example:
register_custom_types()
kind = filetype.guess(sqlite_file_bytes)
kind.mime # "application/x-sqlite3"
"""
filetype.add_type(SQLiteType())
filetype.add_type(AvroType())
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import struct
# Register custom types
register_custom_types()
# Synthetic magic-byte samples
samples = [
("PNG", b"\x89PNG\r\n\x1a\n" + b"\x00" * 254),
("JPEG", b"\xff\xd8\xff\xe0" + b"\x00" * 258),
("MP4", b"\x00\x00\x00\x18ftypisom" + b"\x00" * 250),
("ZIP", b"PK\x03\x04" + b"\x00" * 258),
("PDF", b"%PDF-1.7\n" + b"\x00" * 253),
("SQLite", b"SQLite format 3\x00" + b"\x00" * 246),
("Unknown",b"\xDE\xAD\xBE\xEF" + b"\x00" * 258),
]
print("=== filetype detection ===")
for label, buf in samples:
result = detect(buf)
print(f" {label:8s}: mime={str(result):35s} ext={result.extension or '-':6s} cat={result.category or '-'}")
print("\n=== validation ===")
png_buf = b"\x89PNG\r\n\x1a\n" + b"\x00" * 254
try:
r = validate_upload(png_buf, allowed_mimes=ALLOWED_IMAGES)
print(f" PNG upload: ACCEPTED ({r.mime})")
except FileTypeError as e:
print(f" PNG upload: REJECTED — {e}")
zip_buf = b"PK\x03\x04" + b"\x00" * 258
try:
r = validate_upload(zip_buf, allowed_mimes=ALLOWED_IMAGES)
print(f" ZIP as image: ACCEPTED ({r.mime})")
except FileTypeError as e:
print(f" ZIP as image: REJECTED — {e}")
print("\n=== safe image check ===")
print(f" is_safe_image(PNG): {is_safe_image(png_buf)}")
print(f" is_safe_image(ZIP): {is_safe_image(zip_buf)}")
print("\n=== MIME routing ===")
def _handle_image(src): return f"processed image ({mime_of(src)})"
def _handle_archive(src): return f"extracted archive ({mime_of(src)})"
for label, buf in [("PNG", png_buf), ("ZIP", zip_buf)]:
out = route_by_type(buf, {"image": _handle_image, "archive": _handle_archive},
default=lambda s: f"unknown: {mime_of(s)}")
print(f" {label}: {out}")
For the python-magic alternative — python-magic is a Python binding for the libmagic C library (the same engine behind the Unix file command), supports 700+ file types, and handles complex multi-part signatures; filetype is a pure-Python implementation with no C dependencies, supports ~80 common binary formats, and runs anywhere Python does — use python-magic when you need maximum format coverage or want to match the file command’s behavior, filetype when you need zero-dependency detection of common media/archive/document types in serverless environments, Docker images, or CI without system library installation. For the mimetypes stdlib alternative — Python’s mimetypes module maps file extensions to MIME types using a lookup table; it never reads file contents and is trivially bypassed by renaming files — filetype reads the actual magic bytes (first 262 bytes of content) to detect the real format regardless of filename — use mimetypes only when you trust the file extension (e.g., serving your own static assets), filetype when validating untrusted uploads where content-spoofing is a security concern. The Claude Skills 360 bundle includes filetype skill sets covering detect()/mime_of()/ext_of() detection helpers, FileTypeResult dataclass, validate_upload() with MIME allowlist and size limit, is_safe_image()/reject_executables() security helpers, route_by_type() content routing, scan_directory()/find_mismatched_extensions() bulk scanning, and SQLiteType/AvroType custom matcher examples with register_custom_types(). Start with the free tier to try file type detection and upload validation code generation.