collections provides specialized container types beyond dict, list, and tuple. from collections import Counter, defaultdict, deque, OrderedDict, namedtuple, ChainMap. Counter: c = Counter(iterable); c.most_common(10); c["x"]; c.update(more); c.subtract(other); +c; c1 + c2; c1 - c2; c1 & c2; c1 | c2. defaultdict: d = defaultdict(list); d["key"].append(val). defaultdict int: d = defaultdict(int); d["x"] += 1. defaultdict set: defaultdict(set). deque: dq = deque(maxlen=5); dq.appendleft(x); dq.append(x); dq.popleft(); dq.pop(); dq.rotate(n); dq.extendleft([...]). OrderedDict: od = OrderedDict(); od.move_to_end("k"); od.popitem(last=True). namedtuple: Point = namedtuple("Point", ["x","y"]); p = Point(1,2); p.x; p._asdict(); p._replace(y=5); Point._fields. typed namedtuple: from typing import NamedTuple; class Point(NamedTuple): x: float; y: float. ChainMap: cm = ChainMap(local_cfg, env_cfg, defaults); cm["key"]; cm.new_child({}); cm.parents. UserDict: class MyDict(UserDict): def __setitem__. heapq: import heapq; heapq.heappush(q, (priority, item)); heapq.heappop(q). deque as BFS queue: dq = deque([start]); dq.popleft(). sliding window: dq = deque(arr[:W], maxlen=W). Claude Code generates frequency analysis, graph BFS, LRU caches, config layers, and histogram pipelines.
CLAUDE.md for collections
## collections Stack
- Stdlib: from collections import Counter, defaultdict, deque, OrderedDict, namedtuple, ChainMap
- Frequency: Counter(iterable) | c.most_common(N) | c.update() | c1 + c2 | c1 - c2
- Group: defaultdict(list) | defaultdict(set) | defaultdict(int)
- Queue: deque(maxlen=N) | dq.appendleft() / dq.popleft() — O(1) both ends
- Record: NamedTuple class with type annotations (preferred over namedtuple factory)
- Config: ChainMap(overrides, env_vars, defaults) — first found wins
collections Data Structure Pipeline
# app/containers.py — Counter, defaultdict, deque, OrderedDict, NamedTuple, ChainMap
from __future__ import annotations
import heapq
from collections import (
ChainMap,
Counter,
OrderedDict,
UserDict,
defaultdict,
deque,
namedtuple,
)
from typing import Any, Hashable, Iterable, Iterator, NamedTuple, TypeVar
T = TypeVar("T")
K = TypeVar("K", bound=Hashable)
# ─────────────────────────────────────────────────────────────────────────────
# 1. Counter recipes
# ─────────────────────────────────────────────────────────────────────────────
def word_frequency(text: str, top_n: int = 10) -> list[tuple[str, int]]:
"""
Count word frequencies in text.
Example:
pairs = word_frequency("the quick brown fox the fox", top_n=3)
# [("the", 2), ("fox", 2), ("quick", 1)]
"""
words = text.lower().split()
return Counter(words).most_common(top_n)
def top_items(iterable: Iterable[T], n: int = 10) -> list[tuple[T, int]]:
"""
Return the n most common items in any iterable.
Example:
top_items(["a","b","a","c","a","b"], n=2) # [("a",3),("b",2)]
"""
return Counter(iterable).most_common(n)
def frequency_diff(before: Iterable[T], after: Iterable[T]) -> Counter:
"""
Find what changed between two frequency distributions.
Positive = more in after; negative = fewer.
Example:
diff = frequency_diff(["a","b","b"], ["a","a","c"])
# Counter({"a": 1, "c": 1, "b": -2})
"""
c_before = Counter(before)
c_after = Counter(after)
diff = c_after.copy()
diff.subtract(c_before)
return diff
def histogram(values: Iterable[float], bins: int = 10) -> dict[str, int]:
"""
Bucket numeric values into bins and return a Counter.
Example:
h = histogram([1.2, 3.4, 1.1, 5.6, 3.3, 3.5], bins=3)
"""
data = list(values)
if not data:
return {}
lo, hi = min(data), max(data)
span = (hi - lo) or 1.0
step = span / bins
labels = [f"{lo + i*step:.2f}-{lo + (i+1)*step:.2f}" for i in range(bins)]
counter: Counter = Counter()
for v in data:
idx = min(int((v - lo) / step), bins - 1)
counter[labels[idx]] += 1
return dict(counter.most_common())
# ─────────────────────────────────────────────────────────────────────────────
# 2. defaultdict recipes
# ─────────────────────────────────────────────────────────────────────────────
def group_by(
items: Iterable[T],
key: Any,
) -> dict[Any, list[T]]:
"""
Group items by a key function.
Example:
groups = group_by(["cat","car","bus","bat"], key=lambda w: w[0])
# {"c": ["cat","car"], "b": ["bus","bat"]}
"""
result: defaultdict[Any, list[T]] = defaultdict(list)
for item in items:
result[key(item)].append(item)
return dict(result)
def index_by(items: Iterable[T], key: Any) -> dict[Any, T]:
"""
Build a dict keyed by key(item) — last value wins on collision.
Example:
users = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]
by_id = index_by(users, key=lambda u: u["id"])
"""
return {key(item): item for item in items}
def multi_index(items: Iterable[T], *key_fns) -> dict[Any, set[T]]:
"""
Build an inverted index: tag → {items with that tag}.
Example:
posts = [("p1", ["python","api"]), ("p2", ["python","ui"])]
idx = multi_index(posts, lambda p: p[1]) # explodes lists
"""
result: defaultdict[Any, set] = defaultdict(set)
for item in items:
for key_fn in key_fns:
keys = key_fn(item)
if isinstance(keys, (list, set, tuple)):
for k in keys:
result[k].add(item)
else:
result[keys].add(item)
return dict(result)
def running_total(items: Iterable[tuple[K, float]]) -> dict[K, float]:
"""
Accumulate totals by key.
Example:
totals = running_total([("a", 1.0), ("b", 2.0), ("a", 3.0)])
# {"a": 4.0, "b": 2.0}
"""
acc: defaultdict[K, float] = defaultdict(float)
for key, val in items:
acc[key] += val
return dict(acc)
# ─────────────────────────────────────────────────────────────────────────────
# 3. deque recipes
# ─────────────────────────────────────────────────────────────────────────────
def sliding_window(iterable: Iterable[T], n: int) -> Iterator[tuple]:
"""
Yield overlapping windows of size n.
Example:
list(sliding_window([1,2,3,4,5], 3)) # [(1,2,3),(2,3,4),(3,4,5)]
"""
window: deque = deque(maxlen=n)
for item in iterable:
window.append(item)
if len(window) == n:
yield tuple(window)
def moving_average(values: Iterable[float], window: int) -> Iterator[float]:
"""
Yield a running average over a sliding window.
Example:
list(moving_average([1,2,3,4,5], 3)) # [2.0, 3.0, 4.0]
"""
dq: deque = deque(maxlen=window)
total = 0.0
for v in values:
if len(dq) == window:
total -= dq[0]
dq.append(v)
total += v
if len(dq) == window:
yield total / window
class LRUCache:
"""
Least-recently-used cache using OrderedDict.
Example:
cache = LRUCache(capacity=3)
cache.put("a", 1)
cache.put("b", 2)
cache.get("a") # 1 — moves "a" to end (most recent)
cache.put("c", 3)
cache.put("d", 4) # evicts "b" (least recent)
"""
def __init__(self, capacity: int) -> None:
self._cap = capacity
self._cache: OrderedDict = OrderedDict()
def get(self, key: Hashable) -> Any | None:
if key not in self._cache:
return None
self._cache.move_to_end(key)
return self._cache[key]
def put(self, key: Hashable, value: Any) -> None:
if key in self._cache:
self._cache.move_to_end(key)
self._cache[key] = value
if len(self._cache) > self._cap:
self._cache.popitem(last=False) # evict oldest
def __len__(self) -> int:
return len(self._cache)
# ─────────────────────────────────────────────────────────────────────────────
# 4. NamedTuple records
# ─────────────────────────────────────────────────────────────────────────────
class Point2D(NamedTuple):
"""
Immutable 2D point.
Example:
p = Point2D(3.0, 4.0)
q = p._replace(y=0.0)
d = p._asdict() # {"x": 3.0, "y": 4.0}
"""
x: float
y: float
def distance_to(self, other: Point2D) -> float:
return ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** 0.5
class HTTPRequest(NamedTuple):
"""Lightweight HTTP request record."""
method: str
path: str
status: int
bytes: int
ms: float
class Metric(NamedTuple):
"""Monitoring metric record."""
name: str
value: float
tags: tuple[str, ...] = ()
# ─────────────────────────────────────────────────────────────────────────────
# 5. ChainMap configuration
# ─────────────────────────────────────────────────────────────────────────────
def build_config(
cli_args: dict[str, Any] | None = None,
env_vars: dict[str, Any] | None = None,
file_config: dict[str, Any] | None = None,
defaults: dict[str, Any] | None = None,
) -> ChainMap:
"""
Layer configuration sources: cli > env > file > defaults.
ChainMap looks in each map in order and returns the first match.
Example:
cfg = build_config(
cli_args={"port": 9000},
env_vars={"debug": True},
defaults={"port": 8080, "debug": False, "log_level": "INFO"},
)
cfg["port"] # 9000 (from cli)
cfg["debug"] # True (from env)
cfg["log_level"] # "INFO" (from defaults)
"""
layers = [
d for d in [cli_args, env_vars, file_config, defaults]
if d is not None
]
return ChainMap(*layers)
# ─────────────────────────────────────────────────────────────────────────────
# 6. Priority queue using heapq
# ─────────────────────────────────────────────────────────────────────────────
class PriorityQueue:
"""
Min-heap priority queue (lower priority number = higher priority).
Example:
pq = PriorityQueue()
pq.push(Task("low", priority=10))
pq.push(Task("high", priority=1))
pq.push(Task("med", priority=5))
pq.pop() # Task("high", priority=1)
"""
def __init__(self) -> None:
self._heap: list = []
self._counter = 0 # tiebreaker
def push(self, item: Any, priority: float = 0.0) -> None:
heapq.heappush(self._heap, (priority, self._counter, item))
self._counter += 1
def pop(self) -> Any:
_, _, item = heapq.heappop(self._heap)
return item
def peek(self) -> Any:
_, _, item = self._heap[0]
return item
def __len__(self) -> int:
return len(self._heap)
def __bool__(self) -> bool:
return bool(self._heap)
# ─────────────────────────────────────────────────────────────────────────────
# 7. Validated UserDict
# ─────────────────────────────────────────────────────────────────────────────
class TypedDict(UserDict):
"""
UserDict that enforces a type schema on writes.
Example:
schema = {"port": int, "debug": bool, "name": str}
cfg = TypedDict(schema)
cfg["port"] = 8080 # OK
cfg["port"] = "8080" # raises TypeError
cfg["extra"] = True # raises KeyError (unknown field)
"""
def __init__(self, schema: dict[str, type]) -> None:
super().__init__()
self._schema = schema
def __setitem__(self, key: str, value: Any) -> None:
if key not in self._schema:
raise KeyError(f"Unknown field: {key!r}")
expected = self._schema[key]
if not isinstance(value, expected):
raise TypeError(
f"Field {key!r} expects {expected.__name__}, got {type(value).__name__}"
)
super().__setitem__(key, value)
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== collections demo ===")
print("\n--- word_frequency ---")
text = "the quick brown fox jumps over the lazy dog the fox"
print(f" top 4: {word_frequency(text, 4)}")
print("\n--- frequency_diff ---")
diff = frequency_diff(["a","b","b","c"], ["a","a","c","c","d"])
print(f" diff: {dict(diff)}")
print("\n--- group_by ---")
words = ["cat","car","bus","bat","cup"]
groups = group_by(words, key=lambda w: w[0])
print(f" by first letter: {dict(groups)}")
print("\n--- running_total ---")
sales = [("north", 100.0), ("south", 200.0), ("north", 50.0), ("south", 75.0)]
print(f" totals: {running_total(sales)}")
print("\n--- sliding_window ---")
print(f" windows(3): {list(sliding_window([1,2,3,4,5], 3))}")
print("\n--- moving_average ---")
print(f" ma(3): {list(moving_average([1.0,2.0,3.0,4.0,5.0], 3))}")
print("\n--- LRUCache ---")
cache = LRUCache(capacity=3)
for k, v in [("a",1),("b",2),("c",3)]:
cache.put(k, v)
cache.get("a") # touch "a"
cache.put("d", 4) # evicts "b"
print(f" keys: {list(cache._cache.keys())} len={len(cache)}")
print("\n--- Point2D ---")
p = Point2D(3.0, 4.0)
q = Point2D(0.0, 0.0)
print(f" {p} distance_to({q}): {p.distance_to(q)}")
print(f" _asdict: {p._asdict()}")
print("\n--- build_config (ChainMap) ---")
cfg = build_config(
cli_args={"port": 9000},
env_vars={"debug": True},
defaults={"port": 8080, "debug": False, "log_level": "INFO"},
)
print(f" port={cfg['port']} debug={cfg['debug']} log_level={cfg['log_level']}")
print("\n--- PriorityQueue ---")
pq = PriorityQueue()
for name, pri in [("low",10),("high",1),("med",5)]:
pq.push(name, priority=pri)
order = []
while pq:
order.append(pq.pop())
print(f" dequeued: {order}")
print("\n--- TypedDict ---")
schema = {"port": int, "debug": bool}
td = TypedDict(schema)
td["port"] = 8080
td["debug"] = True
print(f" values: {dict(td)}")
try:
td["port"] = "wrong"
except TypeError as e:
print(f" TypeError: {e}")
print("\n=== done ===")
For the more-itertools alternative — more-itertools (PyPI) extends itertools with 80+ recipes including chunked, windowed, flatten, pairwise, bucket, distribute, and many more; Python’s collections + itertools stdlib covers the fundamental patterns (sliding window via deque, grouping via defaultdict) — use more-itertools for expressive iteration pipelines with minimal code, stdlib collections and itertools when zero external dependencies are required or you need the underlying data structures (Counter, deque, ChainMap) not just iteration combinators. For the sortedcontainers alternative — sortedcontainers (PyPI) provides SortedList, SortedDict, and SortedSet that maintain sort order on every insert/delete in O(log n); Python’s stdlib collections does not maintain sorted order (though heapq provides a min-heap) — use sortedcontainers when you need to frequently insert and query by rank or range, stdlib collections.Counter + most_common() when you only need the top-N items after all insertions are done. The Claude Skills 360 bundle includes collections skill sets covering word_frequency()/frequency_diff()/histogram() Counter recipes, group_by()/index_by()/running_total() defaultdict patterns, sliding_window()/moving_average() deque helpers, LRUCache OrderedDict, Point2D/HTTPRequest/Metric NamedTuple records, build_config() ChainMap layering, PriorityQueue heapq wrapper, and TypedDict UserDict extension. Start with the free tier to try typed data structure and collections pipeline code generation.