cbor2 encodes Python objects to CBOR (Concise Binary Object Representation) — a binary data format designed by the IETF. pip install cbor2. Encode: import cbor2; data = cbor2.dumps({"key": "value", "n": 42}). Decode: cbor2.loads(data). File: cbor2.dump(obj, fh). cbor2.load(fh). Native types: datetime.datetime → CBOR tag 1 timestamp; datetime.date → tag 1 date string; decimal.Decimal → tag 4; uuid.UUID → tag 37; fractions.Fraction → tag 30; IPv4Address → tag 52; set | frozenset → tag 258; re.Pattern → tag 21065. Timezone: CBOREncoder(timezone=datetime.timezone.utc) — encode naive datetimes as UTC. Canonical: CBOREncoder(canonical=True) — deterministic byte output (sorted keys). Object hook: def hook(decoder, obj, shareable_index=None): .... CBORDecoder(object_hook=hook). Tag: cbor2.CBORTag(0, "2013-03-21T20:04:00Z") — raw tagged value. tag.tag → int. tag.value. Indef: encoder.encode_length(1, None) for indefinite-length arrays. Shared: CBOREncoder(value_sharing=True) — use references for repeated objects. Float: fpxx prefix handling. Byte string: bytes → bstr. Text: str → tstr. cbor2.shareable_encoder. Diagnostic: cbor2 has no built-in diagnostics; use cbor2.CBORSimpleValue. Claude Code generates cbor2 binary protocols, IoT message serializers, and deterministic encoding pipelines.
CLAUDE.md for cbor2
## cbor2 Stack
- Version: cbor2 >= 5.6 | pip install cbor2
- Encode: cbor2.dumps(obj) → bytes | cbor2.dump(obj, file)
- Decode: cbor2.loads(data) → Python object | cbor2.load(file)
- Native: datetime UTC → tag1; UUID → tag37; Decimal → tag4; IPv4 → tag52; set → tag258
- Canonical: CBOREncoder(canonical=True) — deterministic sorted-key output
- Custom: CBORDecoder(object_hook=fn) | CBORTag(n, value) for custom tags
- Timezone: CBOREncoder(timezone=utc) — encode naive datetimes as timezone-aware
cbor2 Binary Encoding Pipeline
# app/cbor_utils.py — cbor2 serialization, custom types, canonical encoding, and file I/O
from __future__ import annotations
import datetime
import decimal
import fractions
import ipaddress
import io
import re
import uuid
from pathlib import Path
from typing import Any, Callable
import cbor2
from cbor2 import CBORDecoder, CBOREncoder, CBORTag
# ─────────────────────────────────────────────────────────────────────────────
# 1. Core helpers
# ─────────────────────────────────────────────────────────────────────────────
UTC = datetime.timezone.utc
def encode(obj: Any, canonical: bool = False, timezone: datetime.timezone = UTC) -> bytes:
"""
Encode obj to CBOR bytes.
canonical=True: deterministic output with sorted map keys — good for signing.
"""
return cbor2.dumps(obj, canonical=canonical, timezone=timezone)
def decode(data: bytes) -> Any:
"""Decode CBOR bytes to Python object."""
return cbor2.loads(data)
def encode_str(obj: Any, **kwargs) -> str:
"""Encode to CBOR then hex string — convenient for debugging."""
return encode(obj, **kwargs).hex()
def round_trip(obj: Any, **kwargs) -> Any:
"""Encode then immediately decode — useful for normalization."""
return decode(encode(obj, **kwargs))
# ─────────────────────────────────────────────────────────────────────────────
# 2. File I/O
# ─────────────────────────────────────────────────────────────────────────────
def encode_to_file(obj: Any, path: str | Path, canonical: bool = False) -> None:
"""Write CBOR-encoded object to a binary file."""
with open(str(path), "wb") as fh:
cbor2.dump(obj, fh, canonical=canonical, timezone=UTC)
def decode_from_file(path: str | Path) -> Any:
"""Read CBOR-encoded object from a binary file."""
with open(str(path), "rb") as fh:
return cbor2.load(fh)
def encode_records_to_file(records: list[Any], path: str | Path) -> int:
"""
Encode each record as a separate CBOR item appended to a file.
Returns bytes written.
"""
buf = io.BytesIO()
for record in records:
buf.write(cbor2.dumps(record, timezone=UTC))
data = buf.getvalue()
Path(path).write_bytes(data)
return len(data)
def decode_records_from_file(path: str | Path) -> list[Any]:
"""Decode all CBOR items from a file containing concatenated CBOR messages."""
data = Path(path).read_bytes()
buf = io.BytesIO(data)
results = []
while buf.tell() < len(data):
try:
results.append(cbor2.load(buf))
except cbor2.CBORDecodeEOF:
break
return results
# ─────────────────────────────────────────────────────────────────────────────
# 3. Native type demonstration
# ─────────────────────────────────────────────────────────────────────────────
def encode_rich_types() -> bytes:
"""Encode a dict that exercises all natively supported cbor2 types."""
return encode({
"timestamp": datetime.datetime.now(tz=UTC),
"date_only": datetime.date.today(),
"uid": uuid.uuid4(),
"price": decimal.Decimal("19.99"),
"ratio": fractions.Fraction(1, 3),
"ip": ipaddress.IPv4Address("192.168.1.1"),
"ipv6": ipaddress.IPv6Address("::1"),
"tags": frozenset(["python", "cbor"]),
"pattern": re.compile(r"\d+"),
})
def size_comparison(obj: Any) -> dict[str, int]:
"""Compare CBOR vs JSON size for an object."""
import json
cbor_size = len(encode(obj))
json_size = len(json.dumps(obj, default=str).encode())
return {
"cbor": cbor_size,
"json": json_size,
"ratio": round(cbor_size / json_size, 3),
}
# ─────────────────────────────────────────────────────────────────────────────
# 4. Custom tags (extension types)
# ─────────────────────────────────────────────────────────────────────────────
# Custom tag numbers (use 65000+ to avoid IANA-registered range)
TAG_PYTHON_COMPLEX = 65000
TAG_PYTHON_BYTEARRAY = 65001
TAG_PYTHON_EXCEPTION = 65002
def default_encoder_fn(encoder: CBOREncoder, value: Any) -> None:
"""
cbor2 encoder callback for custom types.
Raises cbor2.CBOREncodeTypeError if type not handled.
"""
if isinstance(value, complex):
encoder.encode(CBORTag(TAG_PYTHON_COMPLEX, [value.real, value.imag]))
return
if isinstance(value, bytearray):
encoder.encode(CBORTag(TAG_PYTHON_BYTEARRAY, bytes(value)))
return
if isinstance(value, Exception):
encoder.encode(CBORTag(TAG_PYTHON_EXCEPTION, {
"type": type(value).__name__,
"msg": str(value),
}))
return
raise cbor2.CBOREncodeTypeError(f"Cannot serialize type {type(value)!r}")
def object_hook_fn(decoder: CBORDecoder, value: Any, shareable_index: int = None) -> Any:
"""cbor2 decoder callback — handles CBORTag objects."""
if isinstance(value, CBORTag):
if value.tag == TAG_PYTHON_COMPLEX:
real, imag = value.value
return complex(real, imag)
if value.tag == TAG_PYTHON_BYTEARRAY:
return bytearray(value.value)
if value.tag == TAG_PYTHON_EXCEPTION:
return RuntimeError(value.value.get("msg", ""))
return value
def encode_custom(obj: Any, canonical: bool = False) -> bytes:
"""Encode with custom type support via encoder callback."""
buf = io.BytesIO()
with CBOREncoder(buf, canonical=canonical, timezone=UTC, encoders={}) as enc:
# Register custom types
enc.encoders[complex] = lambda e, v: default_encoder_fn(e, v)
enc.encoders[bytearray] = lambda e, v: default_encoder_fn(e, v)
enc.encoders[Exception] = lambda e, v: default_encoder_fn(e, v)
enc.encode(obj)
return buf.getvalue()
def decode_custom(data: bytes) -> Any:
"""Decode with custom tag support via object_hook."""
return cbor2.loads(data, object_hook=object_hook_fn)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Canonical / deterministic encoding for hashing and signing
# ─────────────────────────────────────────────────────────────────────────────
def canonical_encode(obj: Any) -> bytes:
"""
Produce deterministic CBOR bytes (RFC 7049 canonical form).
Map keys are sorted; same input always produces same bytes.
Use for content-addressable storage, signatures, and deduplication.
"""
return cbor2.dumps(obj, canonical=True, timezone=UTC)
def content_hash(obj: Any) -> str:
"""SHA-256 hash of canonical CBOR — stable content-addressable key."""
import hashlib
return hashlib.sha256(canonical_encode(obj)).hexdigest()
def deterministic_id(obj: Any) -> str:
"""First 16 hex chars of sha256(canonical_cbor) — compact stable ID."""
return content_hash(obj)[:16]
# ─────────────────────────────────────────────────────────────────────────────
# 6. Value sharing (circular / repeated objects)
# ─────────────────────────────────────────────────────────────────────────────
def encode_with_sharing(obj: Any) -> bytes:
"""
Encode with value sharing — repeated objects are referenced rather than
re-serialized. Useful for graphs and objects with shared mutable nodes.
"""
buf = io.BytesIO()
cbor2.dump(obj, buf, value_sharing=True, timezone=UTC)
return buf.getvalue()
def decode_with_sharing(data: bytes) -> Any:
"""Decode data that was encoded with value sharing."""
buf = io.BytesIO(data)
return cbor2.load(buf, value_sharing=True)
# ─────────────────────────────────────────────────────────────────────────────
# 7. IoT / binary protocol helpers
# ─────────────────────────────────────────────────────────────────────────────
def make_sensor_message(
device_id: str,
readings: dict[str, float],
timestamp: datetime.datetime | None = None,
) -> bytes:
"""
Encode a compact IoT sensor message.
CBOR is widely used in CoAP, COSE, and WebAuthn for size-efficient payloads.
"""
ts = timestamp or datetime.datetime.now(tz=UTC)
msg = {
"d": device_id,
"t": ts,
"r": readings,
}
return canonical_encode(msg)
def parse_sensor_message(data: bytes) -> dict[str, Any]:
"""Decode a sensor message and expand compact keys."""
raw = decode(data)
return {
"device_id": raw.get("d"),
"timestamp": raw.get("t"),
"readings": raw.get("r", {}),
}
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import json
import tempfile
print("=== Basic encode/decode ===")
obj = {"name": "Alice", "scores": [98, 87, 92], "active": True}
encoded = encode(obj)
decoded = decode(encoded)
json_bytes = json.dumps(obj).encode()
print(f" CBOR: {len(encoded)} bytes | JSON: {len(json_bytes)} bytes")
print(f" CBOR hex: {encoded.hex()}")
print(f" Decoded: {decoded}")
print("\n=== Native types ===")
rich = decode(encode_rich_types())
for k, v in rich.items():
print(f" {k:<12} = {v!r}")
print("\n=== Canonical + content hash ===")
d1 = {"b": 2, "a": 1}
d2 = {"a": 1, "b": 2}
h1 = content_hash(d1)
h2 = content_hash(d2)
print(f" hash(b=2, a=1) = {h1[:16]}...")
print(f" hash(a=1, b=2) = {h2[:16]}...")
print(f" Equal: {h1 == h2}")
print("\n=== Custom types (complex, bytearray) ===")
custom_obj = {
"complex_num": 3 + 4j,
"raw_bytes": bytearray(b"\x01\x02\x03"),
}
data = encode_custom(custom_obj)
restored = decode_custom(data)
print(f" complex: {restored['complex_num']}")
print(f" bytearray: {restored['raw_bytes']!r}")
print("\n=== IoT sensor message ===")
msg = make_sensor_message(
"sensor-42",
{"temp": 22.5, "humidity": 65.0, "pressure": 1013.2},
)
parsed = parse_sensor_message(msg)
print(f" Size: {len(msg)} bytes")
for k, v in parsed.items():
print(f" {k}: {v}")
print("\n=== File round-trip ===")
with tempfile.NamedTemporaryFile(suffix=".cbor", delete=False) as tmp:
path = tmp.name
records = [{"id": i, "value": i * 1.5, "ts": datetime.datetime.now(UTC)} for i in range(5)]
size = encode_records_to_file(records, path)
loaded = decode_records_from_file(path)
import os; os.unlink(path)
print(f" Wrote {len(records)} records ({size} bytes)")
print(f" Read back: {len(loaded)}, first id={loaded[0]['id']}")
For the msgpack alternative — msgpack is compact and very fast, but has limited native type support (no datetime, UUID, or Decimal — you must define custom Ext codes); cbor2 natively handles datetime, UUID, Decimal, IPv4/IPv6, set/frozenset, Fraction, and regex without any extension code, making it less boilerplate for Python-to-Python or IoT protocols. For the json alternative — JSON is text-based, human-readable, and universally supported; CBOR is binary, typically 10–40% smaller, has a richer type system, and is standardized in RFC 7049 / 8949 — it’s used in CoAP, CBOR Object Signing and Encryption (COSE), WebAuthn, and FIDO2 protocols; use cbor2 when you need a binary protocol that’s standard-defined and supports rich Python types. The Claude Skills 360 bundle includes cbor2 skill sets covering cbor2.dumps()/loads(), encode()/decode() wrappers with canonical/timezone options, file encode_to_file()/decode_from_file(), encode_records_to_file()/decode_records_from_file() multi-record files, native type round-trip demo, canonical_encode() and content_hash() for deterministic IDs, CBORTag custom tags with encoder/decoder callbacks, encode_with_sharing() for circular structures, make_sensor_message()/parse_sensor_message() IoT compact protocol, and size_comparison() vs JSON. Start with the free tier to try CBOR binary serialization code generation.