Python’s bz2 module provides bzip2 compression — better ratios than zlib/gzip, faster than lzma, with a simpler API. import bz2. compress: bz2.compress(data, compresslevel=9) → bz2 bytes; level 1–9 (default 9). decompress: bz2.decompress(data) — fully decompress bz2 bytes in one call. bz2.open: with bz2.open("file.bz2", "rb") as f: → file-like object; "wt" for text write; compresslevel and encoding params. BZ2File: f = bz2.BZ2File("out.bz2", "w", compresslevel=6) — file wrapping. Incremental: c = bz2.BZ2Compressor(compresslevel=6); c.compress(chunk) + ... + c.flush(). BZ2Decompressor: d = bz2.BZ2Decompressor(); d.decompress(chunk) — stateful; d.eof → True when stream ends; d.needs_input → True if more input needed; d.unused_data → bytes after end of stream. Multi-stream: bz2 files may contain concatenated bzip2 streams; BZ2File handles them transparently; BZ2Decompressor only handles one stream — for multi-stream files, reset after each d.eof. Context manager: with bz2.BZ2File("f.bz2", "w") as f: — auto-closes. bz2.open("file.bz2", "rt", encoding="utf-8") — compressed text. Claude Code generates bz2 log rotators, tbz2 archive writers, streaming pipeline components, and multi-stream decoders.
CLAUDE.md for bz2
## bz2 Stack
- Stdlib: import bz2
- One-shot: data = bz2.compress(raw, 6); raw = bz2.decompress(data)
- File: with bz2.open("file.bz2", "rb") as f: data = f.read()
- Write: with bz2.open("out.bz2", "wb", compresslevel=6) as f: f.write(data)
- Stream: c = bz2.BZ2Compressor(6); out = b"".join(c.compress(c) for c in src) + c.flush()
bz2 Compression Pipeline
# app/bz2util.py — compress/decompress, file ops, streaming, multi-stream, bench
from __future__ import annotations
import bz2
import io
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Generator, Iterator
# ─────────────────────────────────────────────────────────────────────────────
# 1. One-shot helpers
# ─────────────────────────────────────────────────────────────────────────────
def compress(data: bytes, level: int = 9) -> bytes:
"""
Compress bytes with bzip2 at the given level (1=fast, 9=best ratio).
Example:
compressed = compress(large_bytes, level=6)
"""
return bz2.compress(data, compresslevel=level)
def decompress(data: bytes) -> bytes:
"""
Decompress bzip2 bytes.
Example:
original = decompress(bz2_bytes)
"""
return bz2.decompress(data)
def compress_ratio(data: bytes, level: int = 9) -> float:
"""
Return original / compressed size ratio.
Example:
print(f"{compress_ratio(data):.2f}x")
"""
c = compress(data, level)
return len(data) / len(c) if c else 0.0
# ─────────────────────────────────────────────────────────────────────────────
# 2. File operations
# ─────────────────────────────────────────────────────────────────────────────
def compress_file(
src: str | Path,
dest: str | Path | None = None,
level: int = 9,
remove_src: bool = False,
chunk_size: int = 65536,
) -> Path:
"""
Compress a file to .bz2.
Example:
path = compress_file("server.log") # → "server.log.bz2"
"""
src_path = Path(src)
dst_path = Path(dest) if dest else Path(str(src_path) + ".bz2")
with src_path.open("rb") as fin, bz2.open(dst_path, "wb", compresslevel=level) as fout:
while True:
chunk = fin.read(chunk_size)
if not chunk:
break
fout.write(chunk)
if remove_src:
src_path.unlink()
return dst_path
def decompress_file(
src: str | Path,
dest: str | Path | None = None,
remove_src: bool = False,
chunk_size: int = 65536,
) -> Path:
"""
Decompress a .bz2 file.
Example:
path = decompress_file("archive.tar.bz2") # → "archive.tar"
"""
src_path = Path(src)
dst_str = str(src_path)
if dst_str.endswith(".bz2"):
dst_str = dst_str[:-4]
dst_path = Path(dest) if dest else Path(dst_str)
with bz2.open(src_path, "rb") as fin, dst_path.open("wb") as fout:
while True:
chunk = fin.read(chunk_size)
if not chunk:
break
fout.write(chunk)
if remove_src:
src_path.unlink()
return dst_path
def read_bz2_text(path: str | Path, encoding: str = "utf-8") -> str:
"""Read a .bz2-compressed text file."""
with bz2.open(str(path), "rt", encoding=encoding) as f:
return f.read()
def write_bz2_text(path: str | Path, text: str,
encoding: str = "utf-8", level: int = 9) -> None:
"""Write a string to a .bz2-compressed text file."""
with bz2.open(str(path), "wt", encoding=encoding, compresslevel=level) as f:
f.write(text)
def read_bz2_lines(path: str | Path, encoding: str = "utf-8") -> list[str]:
"""
Yield lines from a .bz2-compressed text file.
Example:
lines = read_bz2_lines("access.log.bz2")
"""
with bz2.open(str(path), "rt", encoding=encoding) as f:
return f.readlines()
# ─────────────────────────────────────────────────────────────────────────────
# 3. Incremental streaming
# ─────────────────────────────────────────────────────────────────────────────
def compress_stream(
source: Iterator[bytes],
level: int = 9,
) -> Generator[bytes, None, None]:
"""
Compress a stream of byte chunks lazily.
Example:
with open("big.log", "rb") as fin, open("big.log.bz2", "wb") as fout:
chunks = iter(lambda: fin.read(65536), b"")
for c in compress_stream(chunks):
fout.write(c)
"""
comp = bz2.BZ2Compressor(compresslevel=level)
for chunk in source:
out = comp.compress(chunk)
if out:
yield out
final = comp.flush()
if final:
yield final
def decompress_stream(source: Iterator[bytes]) -> Generator[bytes, None, None]:
"""
Decompress a single bzip2 stream lazily.
Example:
with open("data.bz2", "rb") as f:
chunks = iter(lambda: f.read(65536), b"")
for chunk in decompress_stream(chunks):
process(chunk)
"""
dec = bz2.BZ2Decompressor()
for chunk in source:
if dec.eof:
break
out = dec.decompress(chunk)
if out:
yield out
def decompress_multi_stream(data: bytes) -> bytes:
"""
Decompress bzip2 data that may contain multiple concatenated streams.
Example:
full = decompress_multi_stream(concatenated_bz2_data)
"""
result = bytearray()
pos = 0
while pos < len(data):
dec = bz2.BZ2Decompressor()
result.extend(dec.decompress(data[pos:]))
if dec.eof:
pos += len(data[pos:]) - len(dec.unused_data)
else:
break
return bytes(result)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Benchmarking
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class BZ2BenchResult:
level: int
original_size: int
compressed_size: int
compress_ms: float
decompress_ms: float
@property
def ratio(self) -> float:
return self.original_size / self.compressed_size if self.compressed_size else 0.0
def __str__(self) -> str:
return (f"level={self.level} "
f"{self.compressed_size:8,d} B "
f"ratio={self.ratio:.2f}x "
f"cmp={self.compress_ms:.1f}ms "
f"dcmp={self.decompress_ms:.1f}ms")
def benchmark(data: bytes, levels: list[int] | None = None) -> list[BZ2BenchResult]:
"""
Benchmark bz2 at multiple compresslevel values.
Example:
for r in benchmark(sample_data, levels=[1, 5, 9]):
print(r)
"""
targets = levels or [1, 4, 7, 9]
results: list[BZ2BenchResult] = []
for lvl in targets:
t0 = time.monotonic()
compressed = compress(data, lvl)
cmp_ms = (time.monotonic() - t0) * 1000
t0 = time.monotonic()
decompress(compressed)
dcmp_ms = (time.monotonic() - t0) * 1000
results.append(BZ2BenchResult(
level=lvl,
original_size=len(data),
compressed_size=len(compressed),
compress_ms=cmp_ms,
decompress_ms=dcmp_ms,
))
return results
# ─────────────────────────────────────────────────────────────────────────────
# 5. In-memory compressed buffer
# ─────────────────────────────────────────────────────────────────────────────
class BZ2Buffer:
"""
In-memory bz2 reader/writer wrapping a BytesIO stream.
Use as a file-like interface to a compressed in-memory buffer.
Example:
buf = BZ2Buffer()
buf.write(b"line 1\\n")
buf.write(b"line 2\\n")
data = buf.getvalue() # compressed bytes
original = decompress(data)
"""
def __init__(self, level: int = 9) -> None:
self._raw = io.BytesIO()
self._comp = bz2.BZ2Compressor(compresslevel=level)
self._closed = False
def write(self, data: bytes) -> int:
chunk = self._comp.compress(data)
if chunk:
self._raw.write(chunk)
return len(data)
def flush(self) -> bytes:
"""Flush remaining compressed data and return all compressed bytes."""
if not self._closed:
final = self._comp.flush()
if final:
self._raw.write(final)
self._closed = True
return self._raw.getvalue()
def getvalue(self) -> bytes:
return self.flush()
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile
print("=== bz2 demo ===")
sample = (b"Python bz2 compression demo. " * 300 +
bytes(range(256)) * 30)
# ── one-shot ──────────────────────────────────────────────────────────────
print("\n--- compress / decompress ---")
compressed = compress(sample)
print(f" original: {len(sample):8,d} bytes")
print(f" compressed: {len(compressed):8,d} bytes ratio={compress_ratio(sample):.2f}x")
print(f" roundtrip: {decompress(compressed) == sample}")
# ── file ops ──────────────────────────────────────────────────────────────
print("\n--- compress_file / decompress_file ---")
with tempfile.TemporaryDirectory() as tmpdir:
src = Path(tmpdir) / "data.bin"
src.write_bytes(sample)
bz2_path = compress_file(src, level=6)
print(f" {src.name} ({src.stat().st_size:,d}) → {bz2_path.name} ({bz2_path.stat().st_size:,d})")
out = decompress_file(bz2_path, Path(tmpdir) / "data_out.bin")
print(f" decompressed ok: {out.read_bytes() == sample}")
# ── streaming ─────────────────────────────────────────────────────────────
print("\n--- compress_stream / decompress_stream ---")
chunks = [sample[i:i+8192] for i in range(0, len(sample), 8192)]
stream_out = b"".join(compress_stream(iter(chunks), level=1))
stream_in = b"".join(decompress_stream(iter([stream_out])))
print(f" streamed: {len(sample):,d} → {len(stream_out):,d} bytes")
print(f" roundtrip ok: {stream_in == sample}")
# ── multi-stream ──────────────────────────────────────────────────────────
print("\n--- decompress_multi_stream ---")
part1 = compress(b"stream one; ")
part2 = compress(b"stream two!")
multi = decompress_multi_stream(part1 + part2)
print(f" multi-stream result: {multi!r}")
# ── benchmark ─────────────────────────────────────────────────────────────
print("\n--- benchmark ---")
for r in benchmark(sample, levels=[1, 5, 9]):
print(f" {r}")
# ── BZ2Buffer ─────────────────────────────────────────────────────────────
print("\n--- BZ2Buffer ---")
buf = BZ2Buffer(level=6)
for i in range(10):
buf.write(f"line {i}: " .encode() + b"data " * 20 + b"\n")
compressed_buf = buf.getvalue()
print(f" buffer compressed to {len(compressed_buf):,d} bytes")
print(f" roundtrip ok: {decompress(compressed_buf).startswith(b'line 0:')}")
print("\n=== done ===")
For the gzip alternative — gzip implements DEFLATE and offers the same gzip.compress() / gzip.open() / gzip.GzipFile API pattern as bz2; it compresses faster but produces larger files; .gz files are universally supported — use gzip when decompression speed or broad tool compatibility matters most; use bz2 when you need better-than-gzip ratios and .bz2 / .tar.bz2 is the expected format. For the lzma alternative — lzma (stdlib) provides LZMA/XZ compression with the best ratio of the three but the slowest compression speed; .xz is the format used by modern Linux distribution packages — use lzma for archival and distribution packaging where the archive will be downloaded many times (each byte saved multiplies); use bz2 when decompression latency is more important than compression ratio, such as for frequently-accessed log archives. The Claude Skills 360 bundle includes bz2 skill sets covering compress()/decompress()/compress_ratio() one-shot helpers, compress_file()/decompress_file()/read_bz2_text()/write_bz2_text()/read_bz2_lines() file operations, compress_stream()/decompress_stream()/decompress_multi_stream() lazy streaming, BZ2BenchResult dataclass with benchmark(), and BZ2Buffer in-memory compressed writer. Start with the free tier to try BZip2 compression patterns and bz2 pipeline code generation.