Python’s marshal module serializes and deserializes Python values to/from bytes — primarily used for .pyc bytecode files. import marshal. Write: marshal.dump(obj, file) — writes serialized obj to an open binary file. Read: marshal.load(file) → object. In-memory: marshal.dumps(obj) → bytes; marshal.loads(data) → object. Version: marshal.version — current format version integer (4 as of Python 3.12); pass as second arg to dumps/dump to force a specific version. Supported types: None, bool, int, float, complex, str, bytes, bytearray, tuple, list, set, frozenset, dict, types.CodeType (code objects), ellipsis, StopIteration. Not supported: arbitrary class instances, functions, generators — use pickle for those. Code objects: compile(source, filename, mode) returns a CodeType; marshal can round-trip it. .pyc header: 4-byte magic (importlib.util.MAGIC_NUMBER), 4-byte flags, 4-byte mtime (or hash), 4-byte source size — followed by marshalled code object. Claude Code generates bytecode readers, .pyc validators, code object serializers, and lightweight in-process caches.
CLAUDE.md for marshal
## marshal Stack
- Stdlib: import marshal, types, struct, importlib.util
- Dumps: data = marshal.dumps(obj) # obj → bytes
- Loads: obj = marshal.loads(data) # bytes → obj
- File: marshal.dump(obj, open(f,"wb")) # write to file
- obj = marshal.load(open(f,"rb"))
- Version: marshal.version # 4 on CPython 3.12
- Types: None bool int float str bytes tuple list dict set frozenset CodeType
- PYC: magic(4) + flags(4) + mtime(4) + size(4) + marshal.load(f)
- Note: Not for arbitrary objects — use pickle for those
marshal Bytecode Serialization Pipeline
# app/marshalutil.py — dumps/loads, pyc reader, code inspector, cache
from __future__ import annotations
import dis
import importlib.util
import marshal
import struct
import types
from dataclasses import dataclass, field
from pathlib import Path
# ─────────────────────────────────────────────────────────────────────────────
# 1. Round-trip helpers
# ─────────────────────────────────────────────────────────────────────────────
def marshal_roundtrip(obj: object) -> object:
"""
Serialize obj to bytes with marshal and deserialize back.
Useful for testing marshal compatibility of a value.
Example:
assert marshal_roundtrip({"a": [1, 2, 3]}) == {"a": [1, 2, 3]}
code = compile("x = 1", "<test>", "exec")
assert marshal_roundtrip(code).co_filename == "<test>"
"""
return marshal.loads(marshal.dumps(obj))
def is_marshalable(obj: object) -> bool:
"""
Return True if marshal can serialize obj without raising ValueError.
Example:
print(is_marshalable({"key": [1, 2]})) # True
print(is_marshalable(lambda: None)) # False
"""
try:
marshal.dumps(obj)
return True
except (ValueError, TypeError):
return False
def marshal_size(obj: object) -> int:
"""
Return the number of bytes marshal.dumps produces for obj.
Example:
print(marshal_size("hello")) # 8
print(marshal_size([1]*1000)) # varies
"""
return len(marshal.dumps(obj))
# ─────────────────────────────────────────────────────────────────────────────
# 2. .pyc file reader
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class PycHeader:
"""
Parsed header of a CPython .pyc bytecode file.
Fields:
magic — 4-byte magic number (matches importlib.util.MAGIC_NUMBER)
flags — 4-byte bitfield (0 = timestamp-based, 1 = hash-based)
mtime — source mtime (timestamp mode) or 0 (hash mode)
source_size — source file size in bytes (timestamp mode) or 0
hash_value — source hash (hash mode) or 0
valid — True if magic matches current interpreter
"""
magic: bytes
flags: int
mtime: int
source_size: int
hash_value: int
valid: bool
def __str__(self) -> str:
mode = "hash" if self.flags & 1 else "timestamp"
status = "OK" if self.valid else "STALE/WRONG-VERSION"
return (
f"PycHeader(magic={self.magic.hex()}, flags={self.flags:#x}, "
f"mode={mode}, mtime={self.mtime}, size={self.source_size}, "
f"hash={self.hash_value:#x}, {status})"
)
def read_pyc(path: "str | Path") -> "tuple[PycHeader, types.CodeType]":
"""
Read a .pyc file and return (header, code_object).
Example:
import py_compile, tempfile
from pathlib import Path
with tempfile.TemporaryDirectory() as td:
src = Path(td) / "x.py"
src.write_text("answer = 42\n")
pyc = py_compile.compile(str(src), doraise=True)
hdr, code = read_pyc(pyc)
print(hdr)
print(code.co_filename)
"""
data = Path(path).read_bytes()
if len(data) < 16:
raise ValueError(f"File too short to be a valid .pyc: {path}")
magic = data[:4]
flags = struct.unpack_from("<I", data, 4)[0]
if flags & 1:
# hash-based invalidation: bytes 8-16 hold the hash
hash_value = struct.unpack_from("<Q", data, 8)[0]
mtime = 0
source_size = 0
else:
# timestamp-based: bytes 8-11 mtime, 12-15 source size
mtime = struct.unpack_from("<I", data, 8)[0]
source_size = struct.unpack_from("<I", data, 12)[0]
hash_value = 0
valid = (magic == importlib.util.MAGIC_NUMBER)
header = PycHeader(
magic=magic, flags=flags, mtime=mtime,
source_size=source_size, hash_value=hash_value, valid=valid,
)
code = marshal.loads(data[16:])
return header, code
def pyc_is_current(path: "str | Path") -> bool:
"""
Return True if the .pyc file's magic number matches the running interpreter.
Example:
if not pyc_is_current("__pycache__/app.cpython-312.pyc"):
print("stale bytecode")
"""
try:
hdr, _ = read_pyc(path)
return hdr.valid
except Exception:
return False
# ─────────────────────────────────────────────────────────────────────────────
# 3. Code object inspection
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class CodeInfo:
"""
Human-readable summary of a types.CodeType object.
Example:
code = compile("def f(x): return x + 1", "<demo>", "exec")
info = CodeInfo.from_code(code)
print(info)
"""
filename: str
name: str
firstlineno: int
argcount: int
varnames: tuple
freevars: tuple
cellvars: tuple
consts: tuple
names: tuple
flags: int
nested: "list[CodeInfo]" = field(default_factory=list)
@classmethod
def from_code(cls, code: types.CodeType, _depth: int = 0) -> "CodeInfo":
nested = []
for const in code.co_consts:
if isinstance(const, types.CodeType):
nested.append(cls.from_code(const, _depth + 1))
return cls(
filename = code.co_filename,
name = code.co_name,
firstlineno = code.co_firstlineno,
argcount = code.co_argcount,
varnames = code.co_varnames,
freevars = code.co_freevars,
cellvars = code.co_cellvars,
consts = code.co_consts,
names = code.co_names,
flags = code.co_flags,
nested = nested,
)
def __str__(self, indent: int = 0) -> str:
pad = " " * indent
lines = [
f"{pad}CodeInfo({self.name!r} @ {self.filename}:{self.firstlineno})",
f"{pad} args={self.argcount} flags={self.flags:#x}",
f"{pad} varnames={self.varnames}",
f"{pad} names={self.names}",
]
for child in self.nested:
lines.append(child.__str__(indent + 1))
return "\n".join(lines)
def disassemble_source(source: str, filename: str = "<string>") -> str:
"""
Compile source and return dis.Bytecode formatted disassembly string.
Example:
print(disassemble_source("x = 1 + 2"))
"""
import io
code = compile(source, filename, "exec")
buf = io.StringIO()
dis.dis(code, file=buf)
return buf.getvalue()
# ─────────────────────────────────────────────────────────────────────────────
# 4. Marshal-based in-process cache
# ─────────────────────────────────────────────────────────────────────────────
class MarshalCache:
"""
Lightweight file-backed cache that serializes simple Python values
with marshal (faster than pickle for primitive types, no arbitrary code exec).
Supported value types: None, bool, int, float, str, bytes, tuple, list,
dict, set, frozenset.
Example:
cache = MarshalCache("/tmp/myapp.cache")
cache.set("primes", [2, 3, 5, 7, 11])
print(cache.get("primes")) # [2, 3, 5, 7, 11]
print(cache.get("missing", default=[])) # []
"""
def __init__(self, path: "str | Path") -> None:
self._path = Path(path)
self._store: dict = self._load()
def _load(self) -> dict:
if self._path.exists():
try:
return marshal.loads(self._path.read_bytes())
except Exception:
return {}
return {}
def _save(self) -> None:
self._path.write_bytes(marshal.dumps(self._store))
def get(self, key: str, default: object = None) -> object:
"""Return the cached value for key, or default if absent."""
return self._store.get(key, default)
def set(self, key: str, value: object) -> None:
"""Store value under key and persist."""
self._store[key] = value
self._save()
def delete(self, key: str) -> bool:
"""Remove key from the cache. Returns True if it existed."""
if key in self._store:
del self._store[key]
self._save()
return True
return False
def clear(self) -> None:
"""Remove all entries."""
self._store.clear()
self._save()
def keys(self) -> list[str]:
return list(self._store.keys())
def __len__(self) -> int:
return len(self._store)
def __contains__(self, key: str) -> bool:
return key in self._store
# ─────────────────────────────────────────────────────────────────────────────
# 5. Bytecode file utilities
# ─────────────────────────────────────────────────────────────────────────────
def compile_to_bytes(
source: str,
filename: str = "<string>",
optimize: int = 0,
) -> bytes:
"""
Compile Python source to a marshal-encoded code-object bytes blob
(the payload portion of a .pyc, without the header).
Example:
blob = compile_to_bytes("x = 42")
code = marshal.loads(blob)
exec(code)
print(x) # NameError; use in a namespace dict instead
"""
code = compile(source, filename, "exec", optimize=optimize)
return marshal.dumps(code)
def write_pyc(
source: str,
dest: "str | Path",
filename: str = "<string>",
mtime: int = 0,
source_size: int = 0,
optimize: int = 0,
) -> Path:
"""
Write a minimal .pyc file with timestamp-based header to dest.
Useful for build tools and test fixtures.
Example:
path = write_pyc("answer = 42", "/tmp/answer.pyc", filename="answer.py")
hdr, code = read_pyc(path)
print(hdr.valid, code.co_filename)
"""
code_bytes = compile_to_bytes(source, filename, optimize)
header = (
importlib.util.MAGIC_NUMBER # 4 bytes
+ struct.pack("<I", 0) # flags = 0 (timestamp mode)
+ struct.pack("<I", mtime) # mtime
+ struct.pack("<I", source_size) # source size
)
dest = Path(dest)
dest.write_bytes(header + code_bytes)
return dest
def list_pyc_files(root: "str | Path", recursive: bool = True) -> list[Path]:
"""
Return all .pyc files under root.
Example:
for p in list_pyc_files("src"):
hdr, _ = read_pyc(p)
status = "OK" if hdr.valid else "STALE"
print(f" {p} {status}")
"""
root = Path(root)
pattern = "**/*.pyc" if recursive else "*.pyc"
return sorted(root.glob(pattern))
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import py_compile
import tempfile
print("=== marshal demo ===")
# ── round-trip basic types ─────────────────────────────────────────────────
print("\n--- marshal_roundtrip ---")
for obj in [None, True, 42, 3.14, "hello", b"bytes",
(1, 2, 3), [4, 5, 6], {"a": 1}, frozenset({7, 8})]:
result = marshal_roundtrip(obj)
ok = result == obj
print(f" {type(obj).__name__:12s} {'OK' if ok else 'MISMATCH'}")
# ── code object round-trip ─────────────────────────────────────────────────
print("\n--- code object round-trip ---")
source = "def greet(name):\n return f'Hello, {name}!'\n"
code = compile(source, "<demo>", "exec")
blob = marshal.dumps(code)
code2 = marshal.loads(blob)
print(f" original: {code.co_filename} consts={code.co_consts}")
print(f" roundtrip: {code2.co_filename} consts={code2.co_consts}")
print(f" equal: {code == code2}")
# ── marshal_size / is_marshalable ──────────────────────────────────────────
print("\n--- marshal_size / is_marshalable ---")
for v in ["hello", [1] * 100, {"k": "v"}, object()]:
try:
sz = marshal_size(v)
print(f" {type(v).__name__:10s} marshalable=True size={sz}")
except (ValueError, TypeError):
print(f" {type(v).__name__:10s} marshalable=False")
# ── read_pyc ───────────────────────────────────────────────────────────────
print("\n--- read_pyc ---")
with tempfile.TemporaryDirectory() as td:
src = Path(td) / "sample.py"
src.write_text("answer = 42\nprint(answer)\n")
pyc_path = py_compile.compile(str(src), doraise=True)
hdr, code_obj = read_pyc(pyc_path)
print(f" {hdr}")
print(f" code: {code_obj.co_filename} names={code_obj.co_names}")
# ── CodeInfo ───────────────────────────────────────────────────────────────
print("\n--- CodeInfo ---")
src2 = "def add(a, b):\n return a + b\nresult = add(1, 2)\n"
code3 = compile(src2, "<module>", "exec")
info = CodeInfo.from_code(code3)
print(info)
# ── write_pyc ─────────────────────────────────────────────────────────────
print("\n--- write_pyc ---")
with tempfile.TemporaryDirectory() as td:
dest = Path(td) / "hand.pyc"
write_pyc("x = 99", dest, filename="hand.py")
hdr2, code4 = read_pyc(dest)
print(f" header valid: {hdr2.valid}")
print(f" code names: {code4.co_names}")
ns: dict = {}
exec(code4, ns)
print(f" exec result: x = {ns['x']}")
# ── MarshalCache ───────────────────────────────────────────────────────────
print("\n--- MarshalCache ---")
with tempfile.TemporaryDirectory() as td:
cache = MarshalCache(Path(td) / "cache.bin")
cache.set("primes", [2, 3, 5, 7, 11])
cache.set("pi", 3.14159)
# reload from disk
cache2 = MarshalCache(Path(td) / "cache.bin")
print(f" primes: {cache2.get('primes')}")
print(f" pi: {cache2.get('pi')}")
print(f" keys: {cache2.keys()}")
cache2.delete("pi")
print(f" after delete: {cache2.keys()}")
# ── disassemble_source ────────────────────────────────────────────────────
print("\n--- disassemble_source (first 6 lines) ---")
asm = disassemble_source("x = 1 + 2\ny = x * 3")
for line in asm.splitlines()[:6]:
print(f" {line}")
print("\n=== done ===")
For the pickle alternative — pickle.dumps(obj) / pickle.loads(data) supports arbitrary class instances, functions, and generators; marshal only handles a fixed set of built-in types plus code objects — use pickle for general-purpose object serialization; use marshal only when you are working with .pyc bytecode files, code objects from compile(), or need a fast, safe serializer for primitive types that cannot execute arbitrary __reduce__ code. For the struct + raw bytes alternative — struct.pack / struct.unpack give explicit layout control for binary formats — use struct when you are defining your own binary protocol with specific field widths and endianness; use marshal when the entire payload is a Python value (especially a code object) that you want CPython to interpret directly. The Claude Skills 360 bundle includes marshal skill sets covering marshal_roundtrip()/is_marshalable()/marshal_size() round-trip helpers, PycHeader + read_pyc() bytecode file parser, pyc_is_current() staleness checker, CodeInfo.from_code() code object inspector, disassemble_source() dis integration, compile_to_bytes()/write_pyc()/list_pyc_files() bytecode file utilities, and MarshalCache file-backed primitive cache. Start with the free tier to try bytecode serialization patterns and marshal pipeline code generation.