msgpack serializes Python objects to compact binary MessagePack format — faster and smaller than JSON. pip install msgpack. Pack: import msgpack; data = msgpack.packb({"key": "value", "n": 42}). Unpack: msgpack.unpackb(data, raw=False). raw=False (default in 1.0+): decode bytes as str. raw=True: keep as bytes. use_bin_type=True (default): encode Python str as msgpack str, bytes as bin. strict_map_key=False: allow non-string dict keys. Custom types: def default(obj): if isinstance(obj, datetime): return {"__dt__": obj.isoformat()}; raise TypeError. def hook(d): if "__dt__" in d: return datetime.fromisoformat(d["__dt__"]); return d. msgpack.packb(obj, default=default); msgpack.unpackb(data, object_hook=hook). Packer class: packer = msgpack.Packer(); packer.pack(obj). Streaming unpack: unpacker = msgpack.Unpacker(raw=False); unpacker.feed(chunk); for obj in unpacker:. File: msgpack.pack(obj, fh); msgpack.unpack(fh, raw=False). List: sequences pack as msgpack array. Bytes: pack(b"data") → bin type. None: pack(None) → nil. Bool: pack(True|False). Int/float. Numpy: packb(arr.tolist()). Redis: r.set(key, packb(obj)); unpackb(r.get(key)). Speed: ~4× faster pack, ~2× faster unpack vs stdlib json. Claude Code generates msgpack serialization pipelines, binary cache layers, and inter-process message protocols.
CLAUDE.md for msgpack
## msgpack Stack
- Version: msgpack >= 1.0 | pip install msgpack
- Pack: msgpack.packb(obj, use_bin_type=True) → bytes
- Unpack: msgpack.unpackb(data, raw=False) → Python object
- Custom: packb(obj, default=fn) | unpackb(data, object_hook=fn)
- Streaming: Unpacker(raw=False).feed(chunk); for obj in unpacker:
- File: msgpack.pack(obj, fh) | msgpack.unpack(fh, raw=False)
- Types: str/bytes/int/float/bool/None/list/dict — all native; extend with default
msgpack Binary Serialization Pipeline
# app/serialization.py — msgpack pack/unpack, custom types, streaming, and cache
from __future__ import annotations
import datetime
import io
import struct
from pathlib import Path
from typing import Any
import msgpack
# ─────────────────────────────────────────────────────────────────────────────
# 1. Core helpers
# ─────────────────────────────────────────────────────────────────────────────
def pack(obj: Any, **kwargs) -> bytes:
"""
Serialize obj to msgpack bytes.
use_bin_type=True (default this wrapper enforces): Python bytes → msgpack bin type.
"""
return msgpack.packb(obj, use_bin_type=True, **kwargs)
def unpack(data: bytes, **kwargs) -> Any:
"""
Deserialize msgpack bytes to Python object.
raw=False (default): decode msgpack str → Python str (not bytes).
"""
return msgpack.unpackb(data, raw=False, **kwargs)
def pack_to_file(obj: Any, path: str | Path) -> None:
"""Write a msgpack-encoded object to a binary file."""
with open(str(path), "wb") as fh:
msgpack.pack(obj, fh, use_bin_type=True)
def unpack_from_file(path: str | Path) -> Any:
"""Read a msgpack-encoded object from a file."""
with open(str(path), "rb") as fh:
return msgpack.unpack(fh, raw=False)
def pack_many(objects: list[Any]) -> bytes:
"""Pack multiple objects into a single byte stream (concatenated)."""
buf = io.BytesIO()
for obj in objects:
buf.write(pack(obj))
return buf.getvalue()
def unpack_many(data: bytes) -> list[Any]:
"""Unpack all objects from a concatenated msgpack byte stream."""
unpacker = msgpack.Unpacker(raw=False)
unpacker.feed(data)
return list(unpacker)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Custom type extension
# ─────────────────────────────────────────────────────────────────────────────
# Type codes for msgpack Ext
EXT_DATETIME = 1
EXT_DATE = 2
EXT_BYTES_KEY = 3
def _default(obj: Any) -> Any:
"""
Extended type encoder for msgpack.packb(default=_default).
Handles: datetime, date. Add more as needed.
"""
if isinstance(obj, datetime.datetime):
ts = obj.timestamp()
return msgpack.ExtType(EXT_DATETIME, struct.pack(">d", ts))
if isinstance(obj, datetime.date):
days = (obj - datetime.date(1970, 1, 1)).days
return msgpack.ExtType(EXT_DATE, struct.pack(">i", days))
raise TypeError(f"Unknown type: {type(obj)!r}")
def _ext_hook(code: int, data: bytes) -> Any:
"""
Extended type decoder for msgpack.unpackb(ext_hook=_ext_hook).
"""
if code == EXT_DATETIME:
ts = struct.unpack(">d", data)[0]
return datetime.datetime.fromtimestamp(ts, tz=datetime.timezone.utc)
if code == EXT_DATE:
days = struct.unpack(">i", data)[0]
return datetime.date(1970, 1, 1) + datetime.timedelta(days=days)
return msgpack.ExtType(code, data)
def pack_extended(obj: Any) -> bytes:
"""Pack with datetime/date extension support."""
return msgpack.packb(obj, default=_default, use_bin_type=True)
def unpack_extended(data: bytes) -> Any:
"""Unpack with datetime/date extension support."""
return msgpack.unpackb(data, raw=False, ext_hook=_ext_hook)
def pack_typed(obj: Any, custom_default=None) -> bytes:
"""Pack with both built-in extensions and an optional user-supplied encoder."""
def combined_default(o):
try:
return _default(o)
except TypeError:
if custom_default:
return custom_default(o)
raise
return msgpack.packb(obj, default=combined_default, use_bin_type=True)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Streaming (incremental) unpacker
# ─────────────────────────────────────────────────────────────────────────────
class StreamUnpacker:
"""
Feed bytes incrementally into a msgpack Unpacker.
Useful for network streams or chunked file reads.
Usage:
unpacker = StreamUnpacker()
while chunk := socket.recv(4096):
for obj in unpacker.feed(chunk):
process(obj)
"""
def __init__(self, ext_hook=None, raw: bool = False):
self._u = msgpack.Unpacker(
raw=raw,
ext_hook=ext_hook,
max_buffer_size=16 * 1024 * 1024,
)
def feed(self, data: bytes) -> list[Any]:
"""Feed a chunk; return list of fully-parsed objects."""
self._u.feed(data)
results = []
try:
while True:
results.append(self._u.unpack())
except msgpack.exceptions.UnpackValueError:
pass
return results
def __iter__(self):
return iter(self._u)
class StreamPacker:
"""
Pack objects one at a time into a bytes buffer.
Usage:
packer = StreamPacker()
for obj in objects:
data = packer.pack(obj)
socket.send(data)
"""
def __init__(self, autoreset: bool = True):
self._p = msgpack.Packer(use_bin_type=True, autoreset=autoreset)
def pack(self, obj: Any) -> bytes:
return self._p.pack(obj)
def pack_array_header(self, n: int) -> bytes:
return self._p.pack_array_header(n)
def pack_map_header(self, n: int) -> bytes:
return self._p.pack_map_header(n)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Redis cache helpers
# ─────────────────────────────────────────────────────────────────────────────
class MsgpackRedisCache:
"""
Thin Redis cache layer that stores values as msgpack bytes.
Faster and smaller than JSON-based caches.
Usage:
cache = MsgpackRedisCache(redis_client)
cache.set("user:42", user_dict, ttl=3600)
user = cache.get("user:42")
"""
def __init__(self, redis_client, key_prefix: str = ""):
self._r = redis_client
self._prefix = key_prefix
def _key(self, key: str) -> str:
return f"{self._prefix}{key}" if self._prefix else key
def set(self, key: str, value: Any, ttl: int | None = None) -> None:
data = pack_extended(value)
k = self._key(key)
if ttl:
self._r.setex(k, ttl, data)
else:
self._r.set(k, data)
def get(self, key: str, default: Any = None) -> Any:
data = self._r.get(self._key(key))
if data is None:
return default
return unpack_extended(data)
def delete(self, key: str) -> None:
self._r.delete(self._key(key))
def mset(self, mapping: dict[str, Any], ttl: int | None = None) -> None:
"""Set multiple keys at once."""
pipe = self._r.pipeline()
for key, value in mapping.items():
data = pack_extended(value)
k = self._key(key)
if ttl:
pipe.setex(k, ttl, data)
else:
pipe.set(k, data)
pipe.execute()
def mget(self, keys: list[str]) -> dict[str, Any]:
prefixed = [self._key(k) for k in keys]
values = self._r.mget(*prefixed)
return {
k: unpack_extended(v) if v is not None else None
for k, v in zip(keys, values)
}
# ─────────────────────────────────────────────────────────────────────────────
# 5. File archive helpers
# ─────────────────────────────────────────────────────────────────────────────
def write_records(path: str | Path, records: list[Any]) -> int:
"""
Write a sequence of records to a binary msgpack file.
Returns bytes written.
"""
packer = msgpack.Packer(use_bin_type=True)
with open(str(path), "wb") as fh:
for record in records:
fh.write(packer.pack(record))
return Path(path).stat().st_size
def read_records(path: str | Path, ext_hook=None) -> list[Any]:
"""Read all records from a msgpack file."""
with open(str(path), "rb") as fh:
unpacker = msgpack.Unpacker(file_like=fh, raw=False, ext_hook=ext_hook)
return list(unpacker)
def stream_records(path: str | Path, chunk_size: int = 65536):
"""
Generator that yields records from a large msgpack file chunk by chunk.
Memory-efficient alternative to read_records().
"""
with open(str(path), "rb") as fh:
unpacker = msgpack.Unpacker(file_like=fh, raw=False)
yield from unpacker
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import json
import time
print("=== Basic pack/unpack ===")
data = {"name": "Alice", "scores": [98, 87, 92], "active": True, "ratio": 0.94}
packed = pack(data)
unpacked = unpack(packed)
json_bytes = json.dumps(data).encode()
print(f" msgpack: {len(packed)} bytes | JSON: {len(json_bytes)} bytes")
print(f" Ratio: {len(packed)/len(json_bytes):.2f}x")
print(f" Round-trip: {unpacked}")
print("\n=== Custom types (datetime) ===")
rec = {
"name": "event-1",
"created_at": datetime.datetime.now(tz=datetime.timezone.utc),
"date": datetime.date.today(),
}
packed_ext = pack_extended(rec)
unpacked_ext = unpack_extended(packed_ext)
print(f" created_at: {unpacked_ext['created_at']}")
print(f" date: {unpacked_ext['date']}")
print("\n=== Streaming packer ===")
packer = StreamPacker()
chunks = [packer.pack({"i": i, "val": i ** 2}) for i in range(5)]
concatenated = b"".join(chunks)
print(f" Packed {len(chunks)} objects into {len(concatenated)} bytes")
unpacker = StreamUnpacker()
objects = unpacker.feed(concatenated)
print(f" Unpacked: {objects[:3]}...")
print("\n=== File archive ===")
import tempfile
with tempfile.NamedTemporaryFile(suffix=".msgpack", delete=False) as tmp:
path = tmp.name
records = [{"id": i, "name": f"item-{i}", "value": i * 1.5} for i in range(100)]
size = write_records(path, records)
loaded = read_records(path)
print(f" Wrote {len(records)} records to {size} bytes")
print(f" Read back: {len(loaded)} records, first={loaded[0]}")
import os; os.unlink(path)
print("\n=== Speed comparison ===")
obj = {"key": "value", "n": 42, "data": list(range(100))}
N = 10000
t0 = time.perf_counter()
for _ in range(N):
json.dumps(obj).encode()
json_t = time.perf_counter() - t0
t0 = time.perf_counter()
for _ in range(N):
pack(obj)
msg_t = time.perf_counter() - t0
print(f" JSON pack {N}x: {json_t*1000:.1f}ms")
print(f" msgpack {N}x: {msg_t*1000:.1f}ms ({json_t/msg_t:.1f}x faster)")
For the orjson alternative — orjson is a fast JSON library that stays in the JSON format (human-readable, widely supported); msgpack is a binary format that produces smaller payloads and is faster for pack+unpack when both ends speak msgpack — use orjson when your output must be human-readable or consumed by non-Python clients, msgpack when you control both ends and want binary efficiency. For the pickle alternative — pickle is Python-specific, can execute arbitrary code on load (a security risk), and is not portable across Python versions or languages; msgpack is language-agnostic, safe to deserialize untrusted data, and has official implementations in 50+ languages, making it suitable for cross-service protocols. The Claude Skills 360 bundle includes msgpack skill sets covering packb()/unpackb() with use_bin_type/raw, pack_to_file()/unpack_from_file(), pack_many()/unpack_many() concatenated streams, custom type extension with ExtType/default/ext_hook, datetime/date ext support, StreamPacker/StreamUnpacker incremental classes, MsgpackRedisCache set/get/mset/mget, write_records()/read_records()/stream_records() file archive, and speed comparison vs json. Start with the free tier to try binary serialization code generation.