dill extends Python’s pickle to serialize lambdas, closures, classes, and interpreter state. pip install dill. Dump: import dill; dill.dumps(obj) → bytes. Load: dill.loads(data). Lambda: dill.dumps(lambda x: x * 2) — works where pickle.dumps raises AttributeError. Closure: def make_adder(n): return lambda x: x + n; dill.dumps(make_adder(5)). Class instance: dill.dumps(MyClass()). Function: dill.dumps(my_function). File: with open("state.pkl","wb") as f: dill.dump(obj, f). Load file: with open("state.pkl","rb") as f: obj = dill.load(f). Copy: dill.copy(obj) — round-trip serialize/deserialize. Source: dill.source.getsource(fn). Detect: dill.detect.trace(obj) — show what will be pickled. Session: dill.dump_session("session.pkl") / dill.load_session("session.pkl"). Settings: dill.settings["recurse"] = True — enables recursive pickling. Protocol: dill.dumps(obj, protocol=dill.HIGHEST_PROTOCOL). Compatibility: dill.pickles(obj) → bool. Replace stdlib: import dill as pickle. Multiprocessing: patch with pathos.multiprocessing.Pool (uses dill internally). concurrent.futures: serialize task with dill.dumps(fn), deserialize in worker. Claude Code generates dill serialization pipelines, session savers, multiprocessing helpers, and lambda-safe task queues.
CLAUDE.md for dill
## dill Stack
- Version: dill >= 0.3.8 | pip install dill
- Serialize: dill.dumps(obj) / dill.loads(data) — handles lambda, closure, class
- File: dill.dump(obj, f) / dill.load(f) — context manager file I/O
- Copy: dill.copy(obj) — deep copy via serialization round-trip
- Session: dill.dump_session("file.pkl") / dill.load_session("file.pkl")
- Check: dill.pickles(obj) → True/False before attempting serialization
dill Extended Serialization Pipeline
# app/serialization.py — dill serialize, persist, session, multiprocessing, and helpers
from __future__ import annotations
import hashlib
import io
import logging
import os
import pickle
import time
from pathlib import Path
from typing import Any, Callable, TypeVar
import dill
log = logging.getLogger(__name__)
T = TypeVar("T")
# ─────────────────────────────────────────────────────────────────────────────
# 1. Core serialize / deserialize
# ─────────────────────────────────────────────────────────────────────────────
def serialize(
obj: Any,
protocol: int | None = None,
recurse: bool = True,
) -> bytes:
"""
Serialize any Python object to bytes using dill.
protocol: dill.DEFAULT_PROTOCOL, dill.HIGHEST_PROTOCOL, or specific int.
recurse: if True, dill will recursively pickle referenced objects.
Handles objects that stdlib pickle cannot:
- lambda functions
- closures and nested functions
- class methods and static methods
- generator functions
- partial objects with complex args
Example:
data = serialize(lambda x: x ** 2)
fn = deserialize(data)
print(fn(4)) # 16
"""
if recurse:
# Temporarily enable recursive pickling
old = dill.settings.get("recurse", False)
dill.settings["recurse"] = True
try:
kw: dict = {}
if protocol is not None:
kw["protocol"] = protocol
return dill.dumps(obj, **kw)
finally:
if recurse:
dill.settings["recurse"] = old
def deserialize(data: bytes) -> Any:
"""Deserialize bytes produced by serialize() or dill.dumps()."""
return dill.loads(data)
def can_serialize(obj: Any) -> bool:
"""
Check whether an object can be serialized with dill.
Returns True/False without raising.
Example:
if can_serialize(my_fn):
task_queue.put(serialize(my_fn))
else:
raise ValueError(f"Cannot serialize {my_fn}")
"""
return dill.pickles(obj)
def deep_copy(obj: T) -> T:
"""
Deep copy an object via dill serialize/deserialize round-trip.
Handles objects that copy.deepcopy() cannot (lambdas, closures).
"""
return dill.copy(obj)
# ─────────────────────────────────────────────────────────────────────────────
# 2. File persistence
# ─────────────────────────────────────────────────────────────────────────────
def save(obj: Any, path: str | Path, atomic: bool = True) -> Path:
"""
Save an object to a file using dill.
atomic=True: write to a temp file first, then rename (avoids partial writes).
Example:
save(trained_model, "/models/v2.pkl")
model = load("/models/v2.pkl")
"""
p = Path(path)
p.parent.mkdir(parents=True, exist_ok=True)
if atomic:
tmp = p.with_suffix(".tmp")
with open(tmp, "wb") as f:
dill.dump(obj, f)
tmp.rename(p)
else:
with open(p, "wb") as f:
dill.dump(obj, f)
return p
def load(path: str | Path) -> Any:
"""Load an object from a dill-serialized file."""
with open(path, "rb") as f:
return dill.load(f)
def save_if_changed(obj: Any, path: str | Path) -> tuple[Path, bool]:
"""
Save object only if its serialized form changed since last write.
Returns (path, was_written).
Useful for caching model checkpoints without redundant disk writes.
"""
data = serialize(obj)
new_hash = hashlib.sha256(data).hexdigest()
p = Path(path)
hash_p = p.with_suffix(".sha256")
if hash_p.exists() and hash_p.read_text().strip() == new_hash:
return p, False
save(obj, p)
hash_p.write_text(new_hash)
return p, True
# ─────────────────────────────────────────────────────────────────────────────
# 3. Session persistence
# ─────────────────────────────────────────────────────────────────────────────
def save_session(path: str | Path = "session.pkl", exclude: list[str] | None = None) -> Path:
"""
Save the current interpreter session (all globals) to a file.
Useful for saving Jupyter notebook state or REPL sessions.
exclude: names to skip from __main__ (e.g. large DataFrames you don't need).
Usage (in script or notebook):
import dill
# ... compute expensive things ...
save_session("my_session.pkl")
# Later, in a fresh Python session:
load_session("my_session.pkl")
"""
from io import BytesIO
p = Path(path)
if exclude:
import __main__
saved = {k: getattr(__main__, k) for k in dir(__main__)
if not k.startswith("_") and k not in exclude}
buf = BytesIO()
dill.dump(saved, buf)
p.write_bytes(buf.getvalue())
else:
dill.dump_session(str(p))
log.info("Session saved to %s (%d bytes)", p, p.stat().st_size)
return p
def load_session(path: str | Path) -> None:
"""Restore interpreter session from a file saved by save_session()."""
dill.load_session(str(path))
log.info("Session restored from %s", path)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Lambda / closure helpers
# ─────────────────────────────────────────────────────────────────────────────
def serialize_callable(fn: Callable) -> bytes:
"""
Serialize a callable (including lambdas and closures) to bytes.
Raises ValueError if not serializable.
Example:
data = serialize_callable(lambda x: x.strip().lower())
clean = deserialize(data)
print(clean(" Hello ")) # "hello"
"""
if not callable(fn):
raise TypeError(f"{fn!r} is not callable")
if not can_serialize(fn):
raise ValueError(f"Cannot serialize callable: {fn!r}")
return serialize(fn)
def make_serializable_partial(fn: Callable, *args: Any, **kwargs: Any) -> bytes:
"""
Create a zero-arg callable from fn(*args, **kwargs) and serialize it.
Useful for sending tasks over queues that can't transmit live functions.
Example:
task_bytes = make_serializable_partial(process_file, "/data/input.csv", encoding="utf-8")
send_to_queue(task_bytes) # worker deserializes and calls it
"""
def task():
return fn(*args, **kwargs)
task.__name__ = fn.__name__
return serialize(task)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Multiprocessing helpers
# ─────────────────────────────────────────────────────────────────────────────
def map_with_dill(
fn: Callable,
iterable: list,
workers: int = 4,
timeout: float | None = None,
) -> list:
"""
Parallel map using multiprocessing with dill serialization.
Use this when fn is a lambda or closure that stdlib Pool.map() can't pickle.
Requires: pip install pathos
Falls back to sequential if pathos not installed.
Example:
results = map_with_dill(lambda x: x ** 2, range(100), workers=8)
"""
try:
from pathos.multiprocessing import Pool as DillPool
with DillPool(workers) as pool:
return pool.map(fn, iterable)
except ImportError:
log.warning("pathos not installed — falling back to sequential map")
return [fn(item) for item in iterable]
CONCURRENT_FUTURES_EXAMPLE = '''
# Using dill to serialize tasks for concurrent.futures
import dill
from concurrent.futures import ProcessPoolExecutor
def _run_bytes(task_bytes: bytes) -> bytes:
"""Worker: deserialize and run a task, return serialized result."""
task_fn = dill.loads(task_bytes)
result = task_fn()
return dill.dumps(result)
def submit_lambda(executor, fn, *args, **kwargs):
"""Submit a lambda/closure to a ProcessPoolExecutor via dill."""
from app.serialization import make_serializable_partial
task_bytes = make_serializable_partial(fn, *args, **kwargs)
future = executor.submit(_run_bytes, task_bytes)
return future # call .result() → dill.loads(bytes) to get output
# Usage:
# with ProcessPoolExecutor(max_workers=4) as ex:
# fut = submit_lambda(ex, lambda x: x**2 + 1, 42)
# result = dill.loads(fut.result()) # 1765
'''
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== Lambda serialization ===")
fn = lambda x: x ** 2 + 1
data = serialize(fn)
fn2 = deserialize(data)
print(f" Original: fn(5) = {fn(5)}")
print(f" Deserialized: fn(5) = {fn2(5)}")
print(f" Serialized size: {len(data)} bytes")
print("\n=== Closure serialization ===")
def make_multiplier(n):
return lambda x: x * n
mult3 = make_multiplier(3)
data2 = serialize(mult3)
mult3_copy = deserialize(data2)
print(f" mult3(7) = {mult3(7)}, copy(7) = {mult3_copy(7)}")
print("\n=== File persistence ===")
obj = {"model": lambda x: x * 0.8 + 10, "params": {"slope": 0.8, "intercept": 10}}
p = save(obj, "/tmp/demo_model.pkl")
loaded = load(p)
print(f" Saved to: {p}")
print(f" Prediction: {loaded['model'](50):.1f}")
print("\n=== can_serialize checks ===")
import socket
items = [
lambda x: x, # lambda — dill handles
lambda: print("hi"), # lambda — dill handles
[1, 2, 3], # plain list — always works
{"key": "val"}, # plain dict — always works
]
for item in items:
name = getattr(item, "__name__", repr(item)[:30])
print(f" {name:30s}: pickles={can_serialize(item)}")
print("\n=== make_serializable_partial ===")
def multiply(a, b):
return a * b
task_bytes = make_serializable_partial(multiply, 6, b=7)
task_fn = deserialize(task_bytes)
print(f" Task result: {task_fn()}") # 42
print("\n=== deep_copy ===")
original = {"fn": lambda x: x + 100, "data": list(range(5))}
copied = deep_copy(original)
print(f" original fn(0) = {original['fn'](0)}")
print(f" copy fn(0) = {copied['fn'](0)}")
print(f" Same object? {original is copied}")
For the pickle (stdlib) alternative — Python’s built-in pickle handles basic objects (lists, dicts, class instances) but raises AttributeError: Can't pickle local object for lambdas, closures, and nested functions; dill is a drop-in replacement (import dill as pickle) that extends pickling to virtually all Python objects — use stdlib pickle for simple configuration dicts, dill when you need to serialize functions, closures, or full interpreter sessions. For the cloudpickle alternative — cloudpickle focuses specifically on serializing Python functions and closures for distributed computing (Spark, Dask, Ray, concurrent.futures) with a tighter scope; dill covers a broader surface including session dumps, generator objects, and stack frames — use cloudpickle for distributed PySpark/Dask tasks, dill when you need session persistence or the pathos multiprocessing Pool. The Claude Skills 360 bundle includes dill skill sets covering serialize()/deserialize() with recurse and protocol options, can_serialize() safety check, deep_copy() via round-trip, save()/load() with atomic write, save_if_changed() hash-based caching, save_session()/load_session() interpreter persistence, serialize_callable() with validation, make_serializable_partial() for queue-safe tasks, map_with_dill() pathos Pool wrapper, and concurrent.futures dill bridge pattern. Start with the free tier to try extended Python serialization code generation.