Python’s sndhdr module identifies audio file formats by reading magic bytes, returning a SndHeaders namedtuple with format metadata. import sndhdr. what: sndhdr.what("audio.wav") → SndHeaders(filetype, sample_rate, channels, encoding_type, sample_bits) or None. whathdr: sndhdr.whathdr(filename) — same as what. Detected formats: "wav", "aiff", "aifc", "au" (Sun), "hcom", "sndr", "sndt", "voc" (Creative), "8svx" (Amiga), "sb" (signed byte), "ub" (unsigned byte), "ul" (µ-law). SndHeaders fields: filetype (string); sample_rate (int, may be 0 if unknown from header); channels (int); encoding_type (description string or None); sample_bits (int). sndhdr.tests — list of (h, f) -> SndHeaders | None callables; append custom functions to extend. Deprecated in Python 3.11, removed in Python 3.13 — use filetype (PyPI) or mutagen for new code. Claude Code generates audio upload validators, format migration pipelines, audio catalog scanners, and type-aware processing routers.
CLAUDE.md for sndhdr
## sndhdr Stack
- Stdlib: import sndhdr
- File: result = sndhdr.what("audio.wav") # SndHeaders or None
- if result: print(result.filetype, result.sample_rate)
- Header: sndhdr.whathdr("audio.au") # same, reads from disk
- Fields: .filetype .sample_rate .channels .encoding_type .sample_bits
- Note: Deprecated 3.11, removed 3.13 — use filetype for new code
sndhdr Audio Detection Pipeline
# app/sndhdrutil.py — detect, validate, scan, route, catalog
from __future__ import annotations
import io
import struct
from dataclasses import dataclass
from pathlib import Path
from typing import NamedTuple, Any
# sndhdr removed in Python 3.13; graceful fallback
try:
import sndhdr as _sndhdr
_SNDHDR_AVAILABLE = True
except ImportError:
_sndhdr = None # type: ignore[assignment]
_SNDHDR_AVAILABLE = False
# ─────────────────────────────────────────────────────────────────────────────
# 0. Magic-byte fallback for Python 3.13+
# ─────────────────────────────────────────────────────────────────────────────
class SndHeaders(NamedTuple):
filetype: str
sample_rate: int
channels: int
encoding_type: str | None
sample_bits: int
_AUDIO_MAGIC: list[tuple[bytes, int, str]] = [
# WAV: RIFF....WAVEfmt
(b"RIFF", 0, "wav"),
# AIFF / AIFC
(b"FORM", 0, "_form"), # resolved by bytes 8-12
# AU (Sun)
(b".snd", 0, "au"),
# Creative VOC
(b"Creative Voice File\x1a", 0, "voc"),
# 8SVX (Amiga IFF)
(b"8SVX", 0, "8svx"),
# FLAC
(b"fLaC", 0, "flac"),
# OGG
(b"OggS", 0, "ogg"),
# MP3: ID3 tag or sync word
(b"ID3", 0, "mp3"),
]
def _detect_audio_from_bytes(header: bytes) -> SndHeaders | None:
"""Pure-Python audio type detection from magic bytes."""
if len(header) < 4:
return None
# WAV
if header[:4] == b"RIFF" and len(header) >= 12 and header[8:12] == b"WAVE":
sr = ch = bits = 0
if len(header) >= 36:
try:
# fmt chunk always starts at offset 12 for simple WAV
if header[12:16] == b"fmt ":
ch = struct.unpack_from("<H", header, 22)[0]
sr = struct.unpack_from("<I", header, 24)[0]
bits = struct.unpack_from("<H", header, 34)[0]
except struct.error:
pass
return SndHeaders("wav", sr, ch, "PCM", bits)
# AIFF / AIFC
if header[:4] == b"FORM" and len(header) >= 12:
sub = header[8:12]
if sub == b"AIFF":
return SndHeaders("aiff", 0, 0, None, 0)
if sub == b"AIFC":
return SndHeaders("aifc", 0, 0, None, 0)
# AU (Sun audio)
if header[:4] == b".snd":
return SndHeaders("au", 0, 0, None, 0)
# Creative VOC
if header[:20] == b"Creative Voice File\x1a":
return SndHeaders("voc", 0, 0, None, 8)
# FLAC
if header[:4] == b"fLaC":
return SndHeaders("flac", 0, 0, None, 0)
# OGG
if header[:4] == b"OggS":
return SndHeaders("ogg", 0, 0, None, 0)
# MP3 with ID3
if header[:3] == b"ID3":
return SndHeaders("mp3", 0, 0, None, 0)
# MP3 sync word (0xFF 0xFB / 0xFF 0xFA / 0xFF 0xF3 etc.)
if (len(header) >= 2 and header[0] == 0xFF and (header[1] & 0xE0) == 0xE0):
return SndHeaders("mp3", 0, 0, None, 0)
return None
def detect_audio(source: str | Path | bytes) -> SndHeaders | None:
"""
Detect audio format from a file path or raw bytes.
Falls back to magic-byte detection if sndhdr is unavailable.
Returns SndHeaders(filetype, sample_rate, channels, encoding_type, sample_bits)
or None if not a recognized audio format.
Example:
info = detect_audio("recording.wav")
if info:
print(info.filetype, info.sample_rate, "Hz")
"""
if _SNDHDR_AVAILABLE:
if isinstance(source, (str, Path)):
result = _sndhdr.what(str(source))
else:
# sndhdr.what accepts bytes via h= for whathdr only
result = _sndhdr.whathdr(source) if not isinstance(source, bytes) else None
if result is not None:
return SndHeaders(*result)
# Fallback: read header bytes
if isinstance(source, (str, Path)):
with open(str(source), "rb") as f:
header = f.read(64)
elif isinstance(source, bytes):
header = source[:64]
else:
pos = source.tell() if hasattr(source, "tell") else 0
header = source.read(64)
if hasattr(source, "seek"):
source.seek(pos)
return _detect_audio_from_bytes(header)
# ─────────────────────────────────────────────────────────────────────────────
# 1. Format metadata
# ─────────────────────────────────────────────────────────────────────────────
_FORMAT_TO_MIME: dict[str, str] = {
"wav": "audio/wav",
"aiff": "audio/aiff",
"aifc": "audio/x-aiff",
"au": "audio/basic",
"voc": "audio/x-voc",
"8svx": "audio/x-8svx",
"hcom": "audio/x-hcom",
"sndr": "audio/x-sndr",
"sndt": "audio/x-sndt",
"sb": "audio/x-raw",
"ub": "audio/x-raw",
"ul": "audio/x-mulaw",
"flac": "audio/flac",
"ogg": "audio/ogg",
"mp3": "audio/mpeg",
}
_FORMAT_TO_EXT: dict[str, list[str]] = {
"wav": [".wav"],
"aiff": [".aif", ".aiff"],
"aifc": [".aifc"],
"au": [".au", ".snd"],
"voc": [".voc"],
"flac": [".flac"],
"ogg": [".ogg", ".oga"],
"mp3": [".mp3"],
}
# Formats that stdlib wave module can read
_WAVE_READABLE = {"wav"}
# Formats with lossless PCM
_LOSSLESS = {"wav", "aiff", "aifc", "au", "flac"}
def is_audio(source: str | Path | bytes) -> bool:
"""Return True if source is a recognized audio format."""
return detect_audio(source) is not None
def get_mime_type(source: str | Path | bytes) -> str | None:
"""Return the MIME type for the detected audio format."""
info = detect_audio(source)
return _FORMAT_TO_MIME.get(info.filetype) if info else None
def is_lossless(source: str | Path | bytes) -> bool | None:
"""Return True/False for lossless/lossy; None if format is unknown."""
info = detect_audio(source)
if info is None:
return None
return info.filetype in _LOSSLESS
def extension_matches_content(path: str | Path) -> tuple[bool, str | None, str | None]:
"""
Check that a file's extension matches its actual audio content.
Returns (matches, detected_type, declared_type_by_extension).
Example:
ok, detected, declared = extension_matches_content("audio.mp3")
"""
p = Path(path)
ext = p.suffix.lower()
declared: str | None = None
for fmt, exts in _FORMAT_TO_EXT.items():
if ext in exts:
declared = fmt
break
info = detect_audio(p)
detected = info.filetype if info else None
if detected is None or declared is None:
return False, detected, declared
return detected == declared, detected, declared
# ─────────────────────────────────────────────────────────────────────────────
# 2. Batch scanner
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class AudioFile:
path: Path
filetype: str | None
sample_rate: int
channels: int
bits: int
size: int
ext_match: bool
is_lossless: bool | None
def __str__(self) -> str:
match_tag = "✓" if self.ext_match else "✗"
sr_str = f"{self.sample_rate} Hz" if self.sample_rate else "?"
ch_str = {1: "mono", 2: "stereo"}.get(self.channels, f"{self.channels}ch") if self.channels else "?"
bits_str = f"{self.bits}-bit" if self.bits else "?"
return (f"{match_tag} {(self.filetype or 'unknown'):5s} "
f"{sr_str:9s} {ch_str:8s} {bits_str:7s} "
f"{self.size:8,d}B {self.path}")
def scan_directory(
directory: str | Path,
extensions: list[str] | None = None,
recursive: bool = True,
) -> list[AudioFile]:
"""
Scan a directory for audio files and classify each one.
Example:
for af in scan_directory("media/", extensions=[".wav", ".mp3"]):
print(af)
"""
root = Path(directory)
pattern = "**/*" if recursive else "*"
results: list[AudioFile] = []
for p in sorted(root.glob(pattern)):
if not p.is_file():
continue
if extensions and p.suffix.lower() not in extensions:
continue
try:
info = detect_audio(p)
if info is None and not extensions:
continue
_, detected, declared = extension_matches_content(p)
ext_ok = (detected == declared) if detected and declared else False
results.append(AudioFile(
path=p.relative_to(root),
filetype=info.filetype if info else None,
sample_rate=info.sample_rate if info else 0,
channels=info.channels if info else 0,
bits=info.sample_bits if info else 0,
size=p.stat().st_size,
ext_match=ext_ok,
is_lossless=_LOSSLESS.get(info.filetype) if info else None,
))
except OSError:
continue
return results
def format_summary(files: list[AudioFile]) -> dict[str, int]:
"""Count files by filetype."""
counts: dict[str, int] = {}
for af in files:
key = af.filetype or "unknown"
counts[key] = counts.get(key, 0) + 1
return counts
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import array, tempfile, os
print("=== sndhdr demo ===")
print(f" sndhdr available: {_SNDHDR_AVAILABLE}")
# ── build minimal test audio headers ──────────────────────────────────────
# Minimal WAV header (44 bytes)
def _minimal_wav(sr: int = 44100, ch: int = 1, bits: int = 16) -> bytes:
byte_rate = sr * ch * bits // 8
block_align = ch * bits // 8
data_size = 0
fmt = struct.pack("<4sI4s4sIHHIIHH",
b"RIFF", 36 + data_size, b"WAVE",
b"fmt ", 16, 1, ch, sr, byte_rate, block_align, bits)
fmt += b"data" + struct.pack("<I", data_size)
return fmt
test_headers: dict[str, bytes] = {
"wav_44100_stereo_16": _minimal_wav(44100, 2, 16),
"wav_22050_mono_8": _minimal_wav(22050, 1, 8),
"aiff": b"FORM\x00\x00\x00\x00AIFF" + b"\x00" * 20,
"aifc": b"FORM\x00\x00\x00\x00AIFC" + b"\x00" * 20,
"au": b".snd" + b"\x00" * 28,
"voc": b"Creative Voice File\x1a" + b"\x00" * 40,
"flac": b"fLaC" + b"\x00" * 28,
"ogg": b"OggS" + b"\x00" * 28,
"mp3_id3": b"ID3" + b"\x03\x00\x00" + b"\x00" * 26,
"mp3_sync": b"\xff\xfb\x90\x00" + b"\x00" * 28,
}
# ── detect_audio from bytes ────────────────────────────────────────────────
print("\n--- detect_audio from bytes ---")
for name, header in test_headers.items():
info = detect_audio(header)
if info:
sr_str = f"sr={info.sample_rate}" if info.sample_rate else ""
ch_str = f"ch={info.channels}" if info.channels else ""
bits_str = f"bits={info.sample_bits}" if info.sample_bits else ""
extra = " ".join(x for x in [sr_str, ch_str, bits_str] if x)
print(f" {name:25s}: {info.filetype!r:6s} {extra}")
else:
print(f" {name:25s}: None")
# ── get_mime_type ──────────────────────────────────────────────────────────
print("\n--- get_mime_type ---")
for name, header in list(test_headers.items())[:5]:
print(f" {name:25s}: {get_mime_type(header)!r}")
# ── is_lossless ────────────────────────────────────────────────────────────
print("\n--- is_lossless ---")
for name, header in test_headers.items():
print(f" {name:25s}: {is_lossless(header)}")
# ── scan_directory with temp files ─────────────────────────────────────────
print("\n--- scan_directory ---")
with tempfile.TemporaryDirectory() as tmpdir:
(Path(tmpdir) / "stereo.wav").write_bytes(_minimal_wav(44100, 2, 16))
(Path(tmpdir) / "mono.wav").write_bytes(_minimal_wav(22050, 1, 8))
(Path(tmpdir) / "music.ogg").write_bytes(b"OggS" + b"\x00" * 28)
(Path(tmpdir) / "wrong_ext.mp3").write_bytes(_minimal_wav(44100, 1, 16)) # WAV with .mp3
(Path(tmpdir) / "readme.txt").write_bytes(b"Not audio")
files = scan_directory(tmpdir, recursive=False)
for af in files:
print(f" {af}")
summary = format_summary(files)
print(f"\n format summary: {summary}")
print("\n=== done ===")
For the mutagen alternative — mutagen (PyPI) reads audio metadata across MP3, FLAC, OGG, M4A, AAC, WMA, AIFF, and more, returning tag data (ID3, Vorbis comments, APEv2) along with audio stream info like bitrate, sample rate, and duration — use mutagen when you need audio metadata (title, artist, album, duration, bitrate) in addition to format detection, as sndhdr only reads magic bytes and does not parse tag frames or codec parameters. For the filetype alternative — filetype (PyPI) detects audio, image, video, archive, and document formats from magic bytes using a unified API (filetype.audio(), filetype.guess()) with no native dependencies — use filetype on Python 3.13+ where sndhdr has been removed, or in any new code where you want a maintained library covering modern formats (MP3, FLAC, OGG, AAC, M4A) that sndhdr does not detect. The Claude Skills 360 bundle includes sndhdr skill sets covering detect_audio() with sndhdr + magic-byte fallback (Python 3.13 safe), SndHeaders namedtuple with is_audio()/get_mime_type()/is_lossless()/extension_matches_content() validators, AudioFile dataclass with scan_directory() batch scanner, and format_summary() statistics. Start with the free tier to try audio format detection patterns and sndhdr pipeline code generation.